Coverage Report

Created: 2025-08-03 09:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/data/zyw/opt-ci/actions-runner/_work/llvm-opt-benchmark/llvm-opt-benchmark/llvm/llvm-project/llvm/lib/Analysis/ValueTracking.cpp
Line
Count
Source
1
//===- ValueTracking.cpp - Walk computations to compute properties --------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains routines that help analyze properties that chains of
10
// computations have.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Analysis/ValueTracking.h"
15
#include "llvm/ADT/APFloat.h"
16
#include "llvm/ADT/APInt.h"
17
#include "llvm/ADT/ArrayRef.h"
18
#include "llvm/ADT/FloatingPointMode.h"
19
#include "llvm/ADT/STLExtras.h"
20
#include "llvm/ADT/ScopeExit.h"
21
#include "llvm/ADT/SmallPtrSet.h"
22
#include "llvm/ADT/SmallSet.h"
23
#include "llvm/ADT/SmallVector.h"
24
#include "llvm/ADT/StringRef.h"
25
#include "llvm/ADT/iterator_range.h"
26
#include "llvm/Analysis/AliasAnalysis.h"
27
#include "llvm/Analysis/AssumeBundleQueries.h"
28
#include "llvm/Analysis/AssumptionCache.h"
29
#include "llvm/Analysis/ConstantFolding.h"
30
#include "llvm/Analysis/DomConditionCache.h"
31
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
32
#include "llvm/Analysis/GuardUtils.h"
33
#include "llvm/Analysis/InstructionSimplify.h"
34
#include "llvm/Analysis/Loads.h"
35
#include "llvm/Analysis/LoopInfo.h"
36
#include "llvm/Analysis/TargetLibraryInfo.h"
37
#include "llvm/Analysis/VectorUtils.h"
38
#include "llvm/Analysis/WithCache.h"
39
#include "llvm/IR/Argument.h"
40
#include "llvm/IR/Attributes.h"
41
#include "llvm/IR/BasicBlock.h"
42
#include "llvm/IR/Constant.h"
43
#include "llvm/IR/ConstantRange.h"
44
#include "llvm/IR/Constants.h"
45
#include "llvm/IR/DerivedTypes.h"
46
#include "llvm/IR/DiagnosticInfo.h"
47
#include "llvm/IR/Dominators.h"
48
#include "llvm/IR/EHPersonalities.h"
49
#include "llvm/IR/Function.h"
50
#include "llvm/IR/GetElementPtrTypeIterator.h"
51
#include "llvm/IR/GlobalAlias.h"
52
#include "llvm/IR/GlobalValue.h"
53
#include "llvm/IR/GlobalVariable.h"
54
#include "llvm/IR/InstrTypes.h"
55
#include "llvm/IR/Instruction.h"
56
#include "llvm/IR/Instructions.h"
57
#include "llvm/IR/IntrinsicInst.h"
58
#include "llvm/IR/Intrinsics.h"
59
#include "llvm/IR/IntrinsicsAArch64.h"
60
#include "llvm/IR/IntrinsicsAMDGPU.h"
61
#include "llvm/IR/IntrinsicsRISCV.h"
62
#include "llvm/IR/IntrinsicsX86.h"
63
#include "llvm/IR/LLVMContext.h"
64
#include "llvm/IR/Metadata.h"
65
#include "llvm/IR/Module.h"
66
#include "llvm/IR/Operator.h"
67
#include "llvm/IR/PatternMatch.h"
68
#include "llvm/IR/Type.h"
69
#include "llvm/IR/User.h"
70
#include "llvm/IR/Value.h"
71
#include "llvm/Support/Casting.h"
72
#include "llvm/Support/CommandLine.h"
73
#include "llvm/Support/Compiler.h"
74
#include "llvm/Support/ErrorHandling.h"
75
#include "llvm/Support/KnownBits.h"
76
#include "llvm/Support/KnownFPClass.h"
77
#include "llvm/Support/MathExtras.h"
78
#include "llvm/TargetParser/RISCVTargetParser.h"
79
#include <algorithm>
80
#include <cassert>
81
#include <cstdint>
82
#include <optional>
83
#include <utility>
84
85
using namespace llvm;
86
using namespace llvm::PatternMatch;
87
88
// Controls the number of uses of the value searched for possible
89
// dominating comparisons.
90
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
91
                                              cl::Hidden, cl::init(20));
92
93
94
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
95
/// returns the element type's bitwidth.
96
1.58G
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
97
1.58G
  if (unsigned BitWidth = Ty->getScalarSizeInBits())
98
1.00G
    return BitWidth;
99
100
585M
  return DL.getPointerTypeSizeInBits(Ty);
101
1.58G
}
102
103
// Given the provided Value and, potentially, a context instruction, return
104
// the preferred context instruction (if any).
105
436M
static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
106
  // If we've been provided with a context instruction, then use that (provided
107
  // it has been inserted).
108
436M
  if (CxtI && 
CxtI->getParent()301M
)
109
301M
    return CxtI;
110
111
  // If the value is really an already-inserted instruction, then use that.
112
135M
  CxtI = dyn_cast<Instruction>(V);
113
135M
  if (CxtI && 
CxtI->getParent()112M
)
114
112M
    return CxtI;
115
116
22.2M
  return nullptr;
117
135M
}
118
119
static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
120
                                   const APInt &DemandedElts,
121
1.76M
                                   APInt &DemandedLHS, APInt &DemandedRHS) {
122
1.76M
  if (isa<ScalableVectorType>(Shuf->getType())) {
123
0
    assert(DemandedElts == APInt(1,1));
124
0
    DemandedLHS = DemandedRHS = DemandedElts;
125
0
    return true;
126
0
  }
127
128
1.76M
  int NumElts =
129
1.76M
      cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
130
1.76M
  return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(),
131
1.76M
                                      DemandedElts, DemandedLHS, DemandedRHS);
132
1.76M
}
133
134
static void computeKnownBits(const Value *V, const APInt &DemandedElts,
135
                             KnownBits &Known, const SimplifyQuery &Q,
136
                             unsigned Depth);
137
138
void llvm::computeKnownBits(const Value *V, KnownBits &Known,
139
1.85G
                            const SimplifyQuery &Q, unsigned Depth) {
140
  // Since the number of lanes in a scalable vector is unknown at compile time,
141
  // we track one bit which is implicitly broadcast to all lanes.  This means
142
  // that all lanes in a scalable vector are considered demanded.
143
1.85G
  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
144
1.85G
  APInt DemandedElts =
145
1.85G
      FVTy ? 
APInt::getAllOnes(FVTy->getNumElements())4.59M
:
APInt(1, 1)1.84G
;
146
1.85G
  ::computeKnownBits(V, DemandedElts, Known, Q, Depth);
147
1.85G
}
148
149
void llvm::computeKnownBits(const Value *V, KnownBits &Known,
150
                            const DataLayout &DL, AssumptionCache *AC,
151
                            const Instruction *CxtI, const DominatorTree *DT,
152
20.9M
                            bool UseInstrInfo, unsigned Depth) {
153
20.9M
  computeKnownBits(V, Known,
154
20.9M
                   SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo),
155
20.9M
                   Depth);
156
20.9M
}
157
158
KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
159
                                 AssumptionCache *AC, const Instruction *CxtI,
160
                                 const DominatorTree *DT, bool UseInstrInfo,
161
302M
                                 unsigned Depth) {
162
302M
  return computeKnownBits(
163
302M
      V, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo), Depth);
164
302M
}
165
166
KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
167
                                 const DataLayout &DL, AssumptionCache *AC,
168
                                 const Instruction *CxtI,
169
                                 const DominatorTree *DT, bool UseInstrInfo,
170
12.0k
                                 unsigned Depth) {
171
12.0k
  return computeKnownBits(
172
12.0k
      V, DemandedElts,
173
12.0k
      SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo), Depth);
174
12.0k
}
175
176
static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
177
148M
                                            const SimplifyQuery &SQ) {
178
  // Look for an inverted mask: (X & ~M) op (Y & M).
179
148M
  {
180
148M
    Value *M;
181
148M
    if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
182
148M
        
match(RHS, m_c_And(m_Specific(M), m_Value()))43.6k
&&
183
148M
        
isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT)11.3k
)
184
1.04k
      return true;
185
148M
  }
186
187
  // X op (Y & ~X)
188
148M
  if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) &&
189
148M
      
isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)870
)
190
414
    return true;
191
192
  // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
193
  // for constant Y.
194
148M
  Value *Y;
195
148M
  if (match(RHS,
196
148M
            m_c_Xor(m_c_And(m_Specific(LHS), m_Value(Y)), m_Deferred(Y))) &&
197
148M
      
isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT)143
&&
198
148M
      
isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)11
)
199
11
    return true;
200
201
  // Peek through extends to find a 'not' of the other side:
202
  // (ext Y) op ext(~Y)
203
148M
  if (match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
204
148M
      
match(RHS, m_ZExtOrSExt(m_Not(m_Specific(Y))))4.34M
&&
205
148M
      
isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT)0
)
206
0
    return true;
207
208
  // Look for: (A & B) op ~(A | B)
209
148M
  {
210
148M
    Value *A, *B;
211
148M
    if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
212
148M
        
match(RHS, m_Not(m_c_Or(m_Specific(A), m_Specific(B))))3.07M
&&
213
148M
        
isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT)35
&&
214
148M
        
isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT)25
)
215
24
      return true;
216
148M
  }
217
218
  // Look for: (X << V) op (Y >> (BitWidth - V))
219
  // or        (X >> V) op (Y << (BitWidth - V))
220
148M
  {
221
148M
    const Value *V;
222
148M
    const APInt *R;
223
148M
    if (((match(RHS, m_Shl(m_Value(), m_Sub(m_APInt(R), m_Value(V)))) &&
224
148M
          
match(LHS, m_LShr(m_Value(), m_Specific(V)))14.4k
) ||
225
148M
         
(148M
match(RHS, m_LShr(m_Value(), m_Sub(m_APInt(R), m_Value(V))))148M
&&
226
148M
          
match(LHS, m_Shl(m_Value(), m_Specific(V)))31.9k
)) &&
227
148M
        
R->uge(LHS->getType()->getScalarSizeInBits())12.9k
)
228
1.28k
      return true;
229
148M
  }
230
231
148M
  return false;
232
148M
}
233
234
bool llvm::haveNoCommonBitsSet(const WithCache<const Value *> &LHSCache,
235
                               const WithCache<const Value *> &RHSCache,
236
74.4M
                               const SimplifyQuery &SQ) {
237
74.4M
  const Value *LHS = LHSCache.getValue();
238
74.4M
  const Value *RHS = RHSCache.getValue();
239
240
74.4M
  assert(LHS->getType() == RHS->getType() &&
241
74.4M
         "LHS and RHS should have the same type");
242
74.4M
  assert(LHS->getType()->isIntOrIntVectorTy() &&
243
74.4M
         "LHS and RHS should be integers");
244
245
74.4M
  if (haveNoCommonBitsSetSpecialCases(LHS, RHS, SQ) ||
246
74.4M
      
haveNoCommonBitsSetSpecialCases(RHS, LHS, SQ)74.4M
)
247
2.77k
    return true;
248
249
74.4M
  return KnownBits::haveNoCommonBitsSet(LHSCache.getKnownBits(SQ),
250
74.4M
                                        RHSCache.getKnownBits(SQ));
251
74.4M
}
252
253
65.7k
bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
254
66.1k
  return 
!I->user_empty()65.7k
&&
all_of(I->users(), [](const User *U) 65.7k
{
255
66.1k
    return match(U, m_ICmp(m_Value(), m_Zero()));
256
66.1k
  });
257
65.7k
}
258
259
2.10M
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
260
2.18M
  return 
!I->user_empty()2.10M
&&
all_of(I->users(), [](const User *U) 2.10M
{
261
2.18M
    CmpPredicate P;
262
2.18M
    return match(U, m_ICmp(P, m_Value(), m_Zero())) && 
ICmpInst::isEquality(P)223k
;
263
2.18M
  });
264
2.10M
}
265
266
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
267
                                  bool OrZero, AssumptionCache *AC,
268
                                  const Instruction *CxtI,
269
                                  const DominatorTree *DT, bool UseInstrInfo,
270
1.46M
                                  unsigned Depth) {
271
1.46M
  return ::isKnownToBeAPowerOfTwo(
272
1.46M
      V, OrZero, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo),
273
1.46M
      Depth);
274
1.46M
}
275
276
static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
277
                           const SimplifyQuery &Q, unsigned Depth);
278
279
bool llvm::isKnownNonNegative(const Value *V, const SimplifyQuery &SQ,
280
107M
                              unsigned Depth) {
281
107M
  return computeKnownBits(V, SQ, Depth).isNonNegative();
282
107M
}
283
284
bool llvm::isKnownPositive(const Value *V, const SimplifyQuery &SQ,
285
73.8k
                           unsigned Depth) {
286
73.8k
  if (auto *CI = dyn_cast<ConstantInt>(V))
287
1.26k
    return CI->getValue().isStrictlyPositive();
288
289
  // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
290
  // this updated.
291
72.5k
  KnownBits Known = computeKnownBits(V, SQ, Depth);
292
72.5k
  return Known.isNonNegative() &&
293
72.5k
         
(10.3k
Known.isNonZero()10.3k
||
isKnownNonZero(V, SQ, Depth)10.3k
);
294
73.8k
}
295
296
bool llvm::isKnownNegative(const Value *V, const SimplifyQuery &SQ,
297
287
                           unsigned Depth) {
298
287
  return computeKnownBits(V, SQ, Depth).isNegative();
299
287
}
300
301
static bool isKnownNonEqual(const Value *V1, const Value *V2,
302
                            const APInt &DemandedElts, const SimplifyQuery &Q,
303
                            unsigned Depth);
304
305
bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
306
186M
                           const SimplifyQuery &Q, unsigned Depth) {
307
  // We don't support looking through casts.
308
186M
  if (V1 == V2 || 
V1->getType() != V2->getType()186M
)
309
21
    return false;
310
186M
  auto *FVTy = dyn_cast<FixedVectorType>(V1->getType());
311
186M
  APInt DemandedElts =
312
186M
      FVTy ? 
APInt::getAllOnes(FVTy->getNumElements())773k
:
APInt(1, 1)185M
;
313
186M
  return ::isKnownNonEqual(V1, V2, DemandedElts, Q, Depth);
314
186M
}
315
316
bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
317
18.1M
                             const SimplifyQuery &SQ, unsigned Depth) {
318
18.1M
  KnownBits Known(Mask.getBitWidth());
319
18.1M
  computeKnownBits(V, Known, SQ, Depth);
320
18.1M
  return Mask.isSubsetOf(Known.Zero);
321
18.1M
}
322
323
static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
324
                                   const SimplifyQuery &Q, unsigned Depth);
325
326
static unsigned ComputeNumSignBits(const Value *V, const SimplifyQuery &Q,
327
219M
                                   unsigned Depth = 0) {
328
219M
  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
329
219M
  APInt DemandedElts =
330
219M
      FVTy ? 
APInt::getAllOnes(FVTy->getNumElements())516k
:
APInt(1, 1)218M
;
331
219M
  return ComputeNumSignBits(V, DemandedElts, Q, Depth);
332
219M
}
333
334
unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
335
                                  AssumptionCache *AC, const Instruction *CxtI,
336
                                  const DominatorTree *DT, bool UseInstrInfo,
337
112M
                                  unsigned Depth) {
338
112M
  return ::ComputeNumSignBits(
339
112M
      V, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo), Depth);
340
112M
}
341
342
unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
343
                                         AssumptionCache *AC,
344
                                         const Instruction *CxtI,
345
                                         const DominatorTree *DT,
346
29.1M
                                         unsigned Depth) {
347
29.1M
  unsigned SignBits = ComputeNumSignBits(V, DL, AC, CxtI, DT, Depth);
348
29.1M
  return V->getType()->getScalarSizeInBits() - SignBits + 1;
349
29.1M
}
350
351
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
352
                                   bool NSW, bool NUW,
353
                                   const APInt &DemandedElts,
354
                                   KnownBits &KnownOut, KnownBits &Known2,
355
384M
                                   const SimplifyQuery &Q, unsigned Depth) {
356
384M
  computeKnownBits(Op1, DemandedElts, KnownOut, Q, Depth + 1);
357
358
  // If one operand is unknown and we have no nowrap information,
359
  // the result will be unknown independently of the second operand.
360
384M
  if (KnownOut.isUnknown() && 
!NSW189M
&&
!NUW147M
)
361
141M
    return;
362
363
243M
  computeKnownBits(Op0, DemandedElts, Known2, Q, Depth + 1);
364
243M
  KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, Known2, KnownOut);
365
366
243M
  if (!Add && 
NSW32.3M
&&
!KnownOut.isNonNegative()26.0M
&&
367
243M
      isImpliedByDomCondition(ICmpInst::ICMP_SLE, Op1, Op0, Q.CxtI, Q.DL)
368
23.3M
          .value_or(false))
369
293k
    KnownOut.makeNonNegative();
370
243M
}
371
372
static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
373
                                bool NUW, const APInt &DemandedElts,
374
                                KnownBits &Known, KnownBits &Known2,
375
44.4M
                                const SimplifyQuery &Q, unsigned Depth) {
376
44.4M
  computeKnownBits(Op1, DemandedElts, Known, Q, Depth + 1);
377
44.4M
  computeKnownBits(Op0, DemandedElts, Known2, Q, Depth + 1);
378
379
44.4M
  bool isKnownNegative = false;
380
44.4M
  bool isKnownNonNegative = false;
381
  // If the multiplication is known not to overflow, compute the sign bit.
382
44.4M
  if (NSW) {
383
19.2M
    if (Op0 == Op1) {
384
      // The product of a number with itself is non-negative.
385
394k
      isKnownNonNegative = true;
386
18.8M
    } else {
387
18.8M
      bool isKnownNonNegativeOp1 = Known.isNonNegative();
388
18.8M
      bool isKnownNonNegativeOp0 = Known2.isNonNegative();
389
18.8M
      bool isKnownNegativeOp1 = Known.isNegative();
390
18.8M
      bool isKnownNegativeOp0 = Known2.isNegative();
391
      // The product of two numbers with the same sign is non-negative.
392
18.8M
      isKnownNonNegative = (isKnownNegativeOp1 && 
isKnownNegativeOp01.05M
) ||
393
18.8M
                           
(18.8M
isKnownNonNegativeOp118.8M
&&
isKnownNonNegativeOp011.5M
);
394
18.8M
      if (!isKnownNonNegative && 
NUW14.1M
) {
395
        // mul nuw nsw with a factor > 1 is non-negative.
396
1.37M
        KnownBits One = KnownBits::makeConstant(APInt(Known.getBitWidth(), 1));
397
1.37M
        isKnownNonNegative = KnownBits::sgt(Known, One).value_or(false) ||
398
1.37M
                             
KnownBits::sgt(Known2, One).value_or(false)493k
;
399
1.37M
      }
400
401
      // The product of a negative number and a non-negative number is either
402
      // negative or zero.
403
18.8M
      if (!isKnownNonNegative)
404
13.2M
        isKnownNegative =
405
13.2M
            (isKnownNegativeOp1 && 
isKnownNonNegativeOp01.05M
&&
406
13.2M
             
Known2.isNonZero()240k
) ||
407
13.2M
            
(13.2M
isKnownNegativeOp013.2M
&&
isKnownNonNegativeOp15.62k
&&
Known.isNonZero()3.03k
);
408
18.8M
    }
409
19.2M
  }
410
411
44.4M
  bool SelfMultiply = Op0 == Op1;
412
44.4M
  if (SelfMultiply)
413
1.05M
    SelfMultiply &=
414
1.05M
        isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
415
44.4M
  Known = KnownBits::mul(Known, Known2, SelfMultiply);
416
417
  // Only make use of no-wrap flags if we failed to compute the sign bit
418
  // directly.  This matters if the multiplication always overflows, in
419
  // which case we prefer to follow the result of the direct computation,
420
  // though as the program is invoking undefined behaviour we can choose
421
  // whatever we like here.
422
44.4M
  if (isKnownNonNegative && 
!Known.isNegative()5.98M
)
423
5.98M
    Known.makeNonNegative();
424
38.4M
  else if (isKnownNegative && 
!Known.isNonNegative()2.34k
)
425
2.34k
    Known.makeNegative();
426
44.4M
}
427
428
void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
429
43.1M
                                             KnownBits &Known) {
430
43.1M
  unsigned BitWidth = Known.getBitWidth();
431
43.1M
  unsigned NumRanges = Ranges.getNumOperands() / 2;
432
43.1M
  assert(NumRanges >= 1);
433
434
43.1M
  Known.Zero.setAllBits();
435
43.1M
  Known.One.setAllBits();
436
437
86.3M
  for (unsigned i = 0; i < NumRanges; 
++i43.1M
) {
438
43.1M
    ConstantInt *Lower =
439
43.1M
        mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
440
43.1M
    ConstantInt *Upper =
441
43.1M
        mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
442
43.1M
    ConstantRange Range(Lower->getValue(), Upper->getValue());
443
    // BitWidth must equal the Ranges BitWidth for the correct number of high
444
    // bits to be set.
445
43.1M
    assert(BitWidth == Range.getBitWidth() &&
446
43.1M
           "Known bit width must match range bit width!");
447
448
    // The first CommonPrefixBits of all values in Range are equal.
449
43.1M
    unsigned CommonPrefixBits =
450
43.1M
        (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero();
451
43.1M
    APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
452
43.1M
    APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth);
453
43.1M
    Known.One &= UnsignedMax & Mask;
454
43.1M
    Known.Zero &= ~UnsignedMax & Mask;
455
43.1M
  }
456
43.1M
}
457
458
77.2M
static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
459
77.2M
  SmallVector<const Instruction *, 16> WorkSet(1, I);
460
77.2M
  SmallPtrSet<const Instruction *, 32> Visited;
461
77.2M
  SmallPtrSet<const Instruction *, 16> EphValues;
462
463
  // The instruction defining an assumption's condition itself is always
464
  // considered ephemeral to that assumption (even if it has other
465
  // non-ephemeral users). See r246696's test case for an example.
466
77.2M
  if (is_contained(I->operands(), E))
467
72.1M
    return true;
468
469
19.1M
  
while (5.13M
!WorkSet.empty()) {
470
14.7M
    const Instruction *V = WorkSet.pop_back_val();
471
14.7M
    if (!Visited.insert(V).second)
472
202
      continue;
473
474
    // If all uses of this value are ephemeral, then so is this value.
475
14.7M
    if (all_of(V->users(), [&](const User *U) {
476
10.4M
          return EphValues.count(cast<Instruction>(U));
477
11.2M
        })) {
478
11.2M
      if (V == E)
479
749k
        return true;
480
481
10.5M
      if (V == I || 
(5.39M
!V->mayHaveSideEffects()5.39M
&&
!V->isTerminator()5.39M
)) {
482
10.5M
        EphValues.insert(V);
483
484
10.5M
        if (const User *U = dyn_cast<User>(V)) {
485
20.9M
          for (const Use &U : U->operands()) {
486
20.9M
            if (const auto *I = dyn_cast<Instruction>(U.get()))
487
9.68M
              WorkSet.push_back(I);
488
20.9M
          }
489
10.5M
        }
490
10.5M
      }
491
10.5M
    }
492
14.7M
  }
493
494
4.38M
  return false;
495
5.13M
}
496
497
// Is this an intrinsic that cannot be speculated but also cannot trap?
498
40.9M
bool llvm::isAssumeLikeIntrinsic(const Instruction *I) {
499
40.9M
  if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
500
38.9M
    return CI->isAssumeLikeIntrinsic();
501
502
2.03M
  return false;
503
40.9M
}
504
505
bool llvm::isValidAssumeForContext(const Instruction *Inv,
506
                                   const Instruction *CxtI,
507
                                   const DominatorTree *DT,
508
153M
                                   bool AllowEphemerals) {
509
  // There are two restrictions on the use of an assume:
510
  //  1. The assume must dominate the context (or the control flow must
511
  //     reach the assume whenever it reaches the context).
512
  //  2. The context must not be in the assume's set of ephemeral values
513
  //     (otherwise we will use the assume to prove that the condition
514
  //     feeding the assume is trivially true, thus causing the removal of
515
  //     the assume).
516
517
153M
  if (Inv->getParent() == CxtI->getParent()) {
518
    // If Inv and CtxI are in the same block, check if the assume (Inv) is first
519
    // in the BB.
520
120M
    if (Inv->comesBefore(CxtI))
521
11.0M
      return true;
522
523
    // Don't let an assume affect itself - this would cause the problems
524
    // `isEphemeralValueOf` is trying to prevent, and it would also make
525
    // the loop below go out of bounds.
526
109M
    if (!AllowEphemerals && 
Inv == CxtI109M
)
527
31.6M
      return false;
528
529
    // The context comes first, but they're both in the same block.
530
    // Make sure there is nothing in between that might interrupt
531
    // the control flow, not even CxtI itself.
532
    // We limit the scan distance between the assume and its context instruction
533
    // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
534
    // it can be adjusted if needed (could be turned into a cl::opt).
535
77.6M
    auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
536
77.6M
    if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15))
537
397k
      return false;
538
539
77.2M
    return AllowEphemerals || 
!isEphemeralValueOf(Inv, CxtI)77.2M
;
540
77.6M
  }
541
542
  // Inv and CxtI are in different blocks.
543
32.8M
  if (DT) {
544
31.2M
    if (DT->dominates(Inv, CxtI))
545
5.06M
      return true;
546
31.2M
  } else 
if (1.50M
Inv->getParent() == CxtI->getParent()->getSinglePredecessor()1.50M
||
547
1.50M
             
Inv->getParent()->isEntryBlock()488k
) {
548
    // We don't have a DT, but this trivially dominates.
549
1.04M
    return true;
550
1.04M
  }
551
552
26.6M
  return false;
553
32.8M
}
554
555
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
556
// we still have enough information about `RHS` to conclude non-zero. For
557
// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
558
// so the extra compile time may not be worth it, but possibly a second API
559
// should be created for use outside of loops.
560
458M
static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
561
  // v u> y implies v != 0.
562
458M
  if (Pred == ICmpInst::ICMP_UGT)
563
9.68M
    return true;
564
565
  // Special-case v != 0 to also handle v != null.
566
448M
  if (Pred == ICmpInst::ICMP_NE)
567
228M
    return match(RHS, m_Zero());
568
569
  // All other predicates - rely on generic ConstantRange handling.
570
220M
  const APInt *C;
571
220M
  auto Zero = APInt::getZero(RHS->getType()->getScalarSizeInBits());
572
220M
  if (match(RHS, m_APInt(C))) {
573
120M
    ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
574
120M
    return !TrueValues.contains(Zero);
575
120M
  }
576
577
100M
  auto *VC = dyn_cast<ConstantDataVector>(RHS);
578
100M
  if (VC == nullptr)
579
100M
    return false;
580
581
0
  for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
582
0
       ++ElemIdx) {
583
0
    ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
584
0
        Pred, VC->getElementAsAPInt(ElemIdx));
585
0
    if (TrueValues.contains(Zero))
586
0
      return false;
587
0
  }
588
0
  return true;
589
0
}
590
591
static void breakSelfRecursivePHI(const Use *U, const PHINode *PHI,
592
                                  Value *&ValOut, Instruction *&CtxIOut,
593
394M
                                  const PHINode **PhiOut = nullptr) {
594
394M
  ValOut = U->get();
595
394M
  if (ValOut == PHI)
596
401k
    return;
597
394M
  CtxIOut = PHI->getIncomingBlock(*U)->getTerminator();
598
394M
  if (PhiOut)
599
393M
    *PhiOut = PHI;
600
394M
  Value *V;
601
  // If the Use is a select of this phi, compute analysis on other arm to break
602
  // recursion.
603
  // TODO: Min/Max
604
394M
  if (match(ValOut, m_Select(m_Value(), m_Specific(PHI), m_Value(V))) ||
605
394M
      
match(ValOut, m_Select(m_Value(), m_Value(V), m_Specific(PHI)))393M
)
606
3.06M
    ValOut = V;
607
608
  // Same for select, if this phi is 2-operand phi, compute analysis on other
609
  // incoming value to break recursion.
610
  // TODO: We could handle any number of incoming edges as long as we only have
611
  // two unique values.
612
394M
  if (auto *IncPhi = dyn_cast<PHINode>(ValOut);
613
394M
      IncPhi && 
IncPhi->getNumIncomingValues() == 257.9M
) {
614
113M
    for (int Idx = 0; Idx < 2; 
++Idx68.8M
) {
615
84.1M
      if (IncPhi->getIncomingValue(Idx) == PHI) {
616
15.3M
        ValOut = IncPhi->getIncomingValue(1 - Idx);
617
15.3M
        if (PhiOut)
618
15.2M
          *PhiOut = IncPhi;
619
15.3M
        CtxIOut = IncPhi->getIncomingBlock(1 - Idx)->getTerminator();
620
15.3M
        break;
621
15.3M
      }
622
84.1M
    }
623
44.2M
  }
624
394M
}
625
626
612M
static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
627
  // Use of assumptions is context-sensitive. If we don't have a context, we
628
  // cannot use them!
629
612M
  if (!Q.AC || 
!Q.CxtI392M
)
630
222M
    return false;
631
632
389M
  for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
633
18.1M
    if (!Elem.Assume)
634
2.08M
      continue;
635
636
16.0M
    AssumeInst *I = cast<AssumeInst>(Elem.Assume);
637
16.0M
    assert(I->getFunction() == Q.CxtI->getFunction() &&
638
16.0M
           "Got assumption for the wrong function!");
639
640
16.0M
    if (Elem.Index != AssumptionCache::ExprResultIdx) {
641
21.4k
      if (!V->getType()->isPointerTy())
642
0
        continue;
643
21.4k
      if (RetainedKnowledge RK = getKnowledgeFromBundle(
644
21.4k
              *I, I->bundle_op_info_begin()[Elem.Index])) {
645
21.4k
        if (RK.WasOn == V &&
646
21.4k
            
(21.0k
RK.AttrKind == Attribute::NonNull21.0k
||
647
21.0k
             (RK.AttrKind == Attribute::Dereferenceable &&
648
21.0k
              !NullPointerIsDefined(Q.CxtI->getFunction(),
649
0
                                    V->getType()->getPointerAddressSpace()))) &&
650
21.4k
            
isValidAssumeForContext(I, Q.CxtI, Q.DT)0
)
651
0
          return true;
652
21.4k
      }
653
21.4k
      continue;
654
21.4k
    }
655
656
    // Warning: This loop can end up being somewhat performance sensitive.
657
    // We're running this loop for once for each value queried resulting in a
658
    // runtime of ~O(#assumes * #values).
659
660
16.0M
    Value *RHS;
661
16.0M
    CmpPredicate Pred;
662
16.0M
    auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
663
16.0M
    if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
664
398k
      continue;
665
666
15.6M
    if (cmpExcludesZero(Pred, RHS) && 
isValidAssumeForContext(I, Q.CxtI, Q.DT)14.4M
)
667
817k
      return true;
668
15.6M
  }
669
670
389M
  return false;
671
389M
}
672
673
static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred,
674
                                    Value *LHS, Value *RHS, KnownBits &Known,
675
256M
                                    const SimplifyQuery &Q) {
676
256M
  if (RHS->getType()->isPointerTy()) {
677
    // Handle comparison of pointer to null explicitly, as it will not be
678
    // covered by the m_APInt() logic below.
679
27.8M
    if (LHS == V && 
match(RHS, m_Zero())19.3M
) {
680
10.7M
      switch (Pred) {
681
33.1k
      case ICmpInst::ICMP_EQ:
682
33.1k
        Known.setAllZero();
683
33.1k
        break;
684
26.9k
      case ICmpInst::ICMP_SGE:
685
27.5k
      case ICmpInst::ICMP_SGT:
686
27.5k
        Known.makeNonNegative();
687
27.5k
        break;
688
3.63k
      case ICmpInst::ICMP_SLT:
689
3.63k
        Known.makeNegative();
690
3.63k
        break;
691
10.7M
      default:
692
10.7M
        break;
693
10.7M
      }
694
10.7M
    }
695
27.8M
    return;
696
27.8M
  }
697
698
228M
  unsigned BitWidth = Known.getBitWidth();
699
228M
  auto m_V =
700
228M
      m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V)));
701
702
228M
  Value *Y;
703
228M
  const APInt *Mask, *C;
704
228M
  if (!match(RHS, m_APInt(C)))
705
32.1M
    return;
706
707
196M
  uint64_t ShAmt;
708
196M
  switch (Pred) {
709
14.2M
  case ICmpInst::ICMP_EQ:
710
    // assume(V = C)
711
14.2M
    if (match(LHS, m_V)) {
712
645k
      Known = Known.unionWith(KnownBits::makeConstant(*C));
713
      // assume(V & Mask = C)
714
13.6M
    } else if (match(LHS, m_c_And(m_V, m_Value(Y)))) {
715
      // For one bits in Mask, we can propagate bits from C to V.
716
6.09M
      Known.One |= *C;
717
6.09M
      if (match(Y, m_APInt(Mask)))
718
5.66M
        Known.Zero |= ~*C & *Mask;
719
      // assume(V | Mask = C)
720
7.54M
    } else if (match(LHS, m_c_Or(m_V, m_Value(Y)))) {
721
      // For zero bits in Mask, we can propagate bits from C to V.
722
4.08k
      Known.Zero |= ~*C;
723
4.08k
      if (match(Y, m_APInt(Mask)))
724
26
        Known.One |= *C & ~*Mask;
725
      // assume(V << ShAmt = C)
726
7.53M
    } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
727
7.53M
               
ShAmt < BitWidth4.56k
) {
728
      // For those bits in C that are known, we can propagate them to known
729
      // bits in V shifted to the right by ShAmt.
730
4.56k
      KnownBits RHSKnown = KnownBits::makeConstant(*C);
731
4.56k
      RHSKnown.Zero.lshrInPlace(ShAmt);
732
4.56k
      RHSKnown.One.lshrInPlace(ShAmt);
733
4.56k
      Known = Known.unionWith(RHSKnown);
734
      // assume(V >> ShAmt = C)
735
7.53M
    } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
736
7.53M
               
ShAmt < BitWidth11.4k
) {
737
11.4k
      KnownBits RHSKnown = KnownBits::makeConstant(*C);
738
      // For those bits in RHS that are known, we can propagate them to known
739
      // bits in V shifted to the right by C.
740
11.4k
      Known.Zero |= RHSKnown.Zero << ShAmt;
741
11.4k
      Known.One |= RHSKnown.One << ShAmt;
742
11.4k
    }
743
14.2M
    break;
744
94.4M
  case ICmpInst::ICMP_NE: {
745
    // assume (V & B != 0) where B is a power of 2
746
94.4M
    const APInt *BPow2;
747
94.4M
    if (C->isZero() && 
match(LHS, m_And(m_V, m_Power2(BPow2)))58.5M
)
748
8.86M
      Known.One |= *BPow2;
749
94.4M
    break;
750
0
  }
751
87.4M
  default: {
752
87.4M
    const APInt *Offset = nullptr;
753
87.4M
    if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
754
69.4M
      ConstantRange LHSRange = ConstantRange::makeAllowedICmpRegion(Pred, *C);
755
69.4M
      if (Offset)
756
6.73M
        LHSRange = LHSRange.sub(*Offset);
757
69.4M
      Known = Known.unionWith(LHSRange.toKnownBits());
758
69.4M
    }
759
87.4M
    if (Pred == ICmpInst::ICMP_UGT || 
Pred == ICmpInst::ICMP_UGE78.1M
) {
760
      // X & Y u> C     -> X u> C && Y u> C
761
      // X nuw- Y u> C  -> X u> C
762
27.2M
      if (match(LHS, m_c_And(m_V, m_Value())) ||
763
27.2M
          
match(LHS, m_NUWSub(m_V, m_Value()))26.8M
)
764
537k
        Known.One.setHighBits(
765
537k
            (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
766
27.2M
    }
767
87.4M
    if (Pred == ICmpInst::ICMP_ULT || 
Pred == ICmpInst::ICMP_ULE69.7M
) {
768
      // X | Y u< C    -> X u< C && Y u< C
769
      // X nuw+ Y u< C -> X u< C && Y u< C
770
26.1M
      if (match(LHS, m_c_Or(m_V, m_Value())) ||
771
26.1M
          
match(LHS, m_c_NUWAdd(m_V, m_Value()))26.0M
) {
772
582k
        Known.Zero.setHighBits(
773
582k
            (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
774
582k
      }
775
26.1M
    }
776
87.4M
  } break;
777
196M
  }
778
196M
}
779
780
static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
781
                                         KnownBits &Known,
782
256M
                                         const SimplifyQuery &SQ, bool Invert) {
783
256M
  ICmpInst::Predicate Pred =
784
256M
      Invert ? 
Cmp->getInversePredicate()165M
:
Cmp->getPredicate()90.5M
;
785
256M
  Value *LHS = Cmp->getOperand(0);
786
256M
  Value *RHS = Cmp->getOperand(1);
787
788
  // Handle icmp pred (trunc V), C
789
256M
  if (match(LHS, m_Trunc(m_Specific(V)))) {
790
1.80M
    KnownBits DstKnown(LHS->getType()->getScalarSizeInBits());
791
1.80M
    computeKnownBitsFromCmp(LHS, Pred, LHS, RHS, DstKnown, SQ);
792
1.80M
    if (cast<TruncInst>(LHS)->hasNoUnsignedWrap())
793
69.1k
      Known = Known.unionWith(DstKnown.zext(Known.getBitWidth()));
794
1.73M
    else
795
1.73M
      Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth()));
796
1.80M
    return;
797
1.80M
  }
798
799
254M
  computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ);
800
254M
}
801
802
static void computeKnownBitsFromCond(const Value *V, Value *Cond,
803
                                     KnownBits &Known, const SimplifyQuery &SQ,
804
267M
                                     bool Invert, unsigned Depth) {
805
267M
  Value *A, *B;
806
267M
  if (Depth < MaxAnalysisRecursionDepth &&
807
267M
      
match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))246M
) {
808
17.0M
    KnownBits Known2(Known.getBitWidth());
809
17.0M
    KnownBits Known3(Known.getBitWidth());
810
17.0M
    computeKnownBitsFromCond(V, A, Known2, SQ, Invert, Depth + 1);
811
17.0M
    computeKnownBitsFromCond(V, B, Known3, SQ, Invert, Depth + 1);
812
17.0M
    if (Invert ? 
match(Cond, m_LogicalOr(m_Value(), m_Value()))11.4M
813
17.0M
               : 
match(Cond, m_LogicalAnd(m_Value(), m_Value()))5.61M
)
814
9.46M
      Known2 = Known2.unionWith(Known3);
815
7.59M
    else
816
7.59M
      Known2 = Known2.intersectWith(Known3);
817
17.0M
    Known = Known.unionWith(Known2);
818
17.0M
    return;
819
17.0M
  }
820
821
250M
  if (auto *Cmp = dyn_cast<ICmpInst>(Cond)) {
822
240M
    computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
823
240M
    return;
824
240M
  }
825
826
9.62M
  if (match(Cond, m_Trunc(m_Specific(V)))) {
827
197k
    KnownBits DstKnown(1);
828
197k
    if (Invert) {
829
94.6k
      DstKnown.setAllZero();
830
102k
    } else {
831
102k
      DstKnown.setAllOnes();
832
102k
    }
833
197k
    if (cast<TruncInst>(Cond)->hasNoUnsignedWrap()) {
834
83.5k
      Known = Known.unionWith(DstKnown.zext(Known.getBitWidth()));
835
83.5k
      return;
836
83.5k
    }
837
113k
    Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth()));
838
113k
    return;
839
197k
  }
840
841
9.42M
  if (Depth < MaxAnalysisRecursionDepth && 
match(Cond, m_Not(m_Value(A)))4.87M
)
842
54.5k
    computeKnownBitsFromCond(V, A, Known, SQ, !Invert, Depth + 1);
843
9.42M
}
844
845
void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
846
3.63G
                                       const SimplifyQuery &Q, unsigned Depth) {
847
  // Handle injected condition.
848
3.63G
  if (Q.CC && 
Q.CC->AffectedValues.contains(V)6.76M
)
849
2.03M
    computeKnownBitsFromCond(V, Q.CC->Cond, Known, Q, Q.CC->Invert, Depth);
850
851
3.63G
  if (!Q.CxtI)
852
124M
    return;
853
854
3.50G
  if (Q.DC && 
Q.DT1.73G
) {
855
    // Handle dominating conditions.
856
1.73G
    for (BranchInst *BI : Q.DC->conditionsFor(V)) {
857
328M
      BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
858
328M
      if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
859
48.1M
        computeKnownBitsFromCond(V, BI->getCondition(), Known, Q,
860
48.1M
                                 /*Invert*/ false, Depth);
861
862
328M
      BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
863
328M
      if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
864
125M
        computeKnownBitsFromCond(V, BI->getCondition(), Known, Q,
865
125M
                                 /*Invert*/ true, Depth);
866
328M
    }
867
868
1.73G
    if (Known.hasConflict())
869
213
      Known.resetAll();
870
1.73G
  }
871
872
3.50G
  if (!Q.AC)
873
83.8M
    return;
874
875
3.42G
  unsigned BitWidth = Known.getBitWidth();
876
877
  // Note that the patterns below need to be kept in sync with the code
878
  // in AssumptionCache::updateAffectedValues.
879
880
3.42G
  for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
881
73.2M
    if (!Elem.Assume)
882
7.24M
      continue;
883
884
65.9M
    AssumeInst *I = cast<AssumeInst>(Elem.Assume);
885
65.9M
    assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
886
65.9M
           "Got assumption for the wrong function!");
887
888
65.9M
    if (Elem.Index != AssumptionCache::ExprResultIdx) {
889
749k
      if (!V->getType()->isPointerTy())
890
0
        continue;
891
749k
      if (RetainedKnowledge RK = getKnowledgeFromBundle(
892
749k
              *I, I->bundle_op_info_begin()[Elem.Index])) {
893
        // Allow AllowEphemerals in isValidAssumeForContext, as the CxtI might
894
        // be the producer of the pointer in the bundle. At the moment, align
895
        // assumptions aren't optimized away.
896
749k
        if (RK.WasOn == V && 
RK.AttrKind == Attribute::Alignment730k
&&
897
749k
            
isPowerOf2_64(RK.ArgValue)730k
&&
898
749k
            
isValidAssumeForContext(I, Q.CxtI, Q.DT, /*AllowEphemerals*/ true)730k
)
899
73.3k
          Known.Zero.setLowBits(Log2_64(RK.ArgValue));
900
749k
      }
901
749k
      continue;
902
749k
    }
903
904
    // Warning: This loop can end up being somewhat performance sensitive.
905
    // We're running this loop for once for each value queried resulting in a
906
    // runtime of ~O(#assumes * #values).
907
908
65.2M
    Value *Arg = I->getArgOperand(0);
909
910
65.2M
    if (Arg == V && 
isValidAssumeForContext(I, Q.CxtI, Q.DT)15.9M
) {
911
637
      assert(BitWidth == 1 && "assume operand is not i1?");
912
637
      (void)BitWidth;
913
637
      Known.setAllOnes();
914
637
      return;
915
637
    }
916
65.2M
    if (match(Arg, m_Not(m_Specific(V))) &&
917
65.2M
        
isValidAssumeForContext(I, Q.CxtI, Q.DT)153k
) {
918
34
      assert(BitWidth == 1 && "assume operand is not i1?");
919
34
      (void)BitWidth;
920
34
      Known.setAllZero();
921
34
      return;
922
34
    }
923
65.2M
    auto *Trunc = dyn_cast<TruncInst>(Arg);
924
65.2M
    if (Trunc && 
Trunc->getOperand(0) == V93.2k
&&
925
65.2M
        
isValidAssumeForContext(I, Q.CxtI, Q.DT)62.6k
) {
926
230
      if (Trunc->hasNoUnsignedWrap()) {
927
224
        Known = KnownBits::makeConstant(APInt(BitWidth, 1));
928
224
        return;
929
224
      }
930
6
      Known.One.setBit(0);
931
6
      return;
932
230
    }
933
934
    // The remaining tests are all recursive, so bail out if we hit the limit.
935
65.2M
    if (Depth == MaxAnalysisRecursionDepth)
936
0
      continue;
937
938
65.2M
    ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
939
65.2M
    if (!Cmp)
940
441k
      continue;
941
942
64.7M
    if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
943
49.1M
      continue;
944
945
15.6M
    computeKnownBitsFromICmpCond(V, Cmp, Known, Q, /*Invert=*/false);
946
15.6M
  }
947
948
  // Conflicting assumption: Undefined behavior will occur on this execution
949
  // path.
950
3.42G
  if (Known.hasConflict())
951
14
    Known.resetAll();
952
3.42G
}
953
954
/// Compute known bits from a shift operator, including those with a
955
/// non-constant shift amount. Known is the output of this function. Known2 is a
956
/// pre-allocated temporary with the same bit width as Known and on return
957
/// contains the known bit of the shift value source. KF is an
958
/// operator-specific function that, given the known-bits and a shift amount,
959
/// compute the implied known-bits of the shift operator's result respectively
960
/// for that shift amount. The results from calling KF are conservatively
961
/// combined for all permitted shift amounts.
962
static void computeKnownBitsFromShiftOperator(
963
    const Operator *I, const APInt &DemandedElts, KnownBits &Known,
964
    KnownBits &Known2, const SimplifyQuery &Q, unsigned Depth,
965
181M
    function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
966
181M
  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
967
181M
  computeKnownBits(I->getOperand(1), DemandedElts, Known, Q, Depth + 1);
968
  // To limit compile-time impact, only query isKnownNonZero() if we know at
969
  // least something about the shift amount.
970
181M
  bool ShAmtNonZero =
971
181M
      Known.isNonZero() ||
972
181M
      
(26.2M
Known.getMaxValue().ult(Known.getBitWidth())26.2M
&&
973
26.2M
       
isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1)8.28M
);
974
181M
  Known = KF(Known2, Known, ShAmtNonZero);
975
181M
}
976
977
static KnownBits
978
getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
979
                         const KnownBits &KnownLHS, const KnownBits &KnownRHS,
980
313M
                         const SimplifyQuery &Q, unsigned Depth) {
981
313M
  unsigned BitWidth = KnownLHS.getBitWidth();
982
313M
  KnownBits KnownOut(BitWidth);
983
313M
  bool IsAnd = false;
984
313M
  bool HasKnownOne = !KnownLHS.One.isZero() || 
!KnownRHS.One.isZero()307M
;
985
313M
  Value *X = nullptr, *Y = nullptr;
986
987
313M
  switch (I->getOpcode()) {
988
179M
  case Instruction::And:
989
179M
    KnownOut = KnownLHS & KnownRHS;
990
179M
    IsAnd = true;
991
    // and(x, -x) is common idioms that will clear all but lowest set
992
    // bit. If we have a single known bit in x, we can clear all bits
993
    // above it.
994
    // TODO: instcombine often reassociates independent `and` which can hide
995
    // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
996
179M
    if (HasKnownOne && 
match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))141M
) {
997
      // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
998
90
      if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
999
90
        KnownOut = KnownLHS.blsi();
1000
0
      else
1001
0
        KnownOut = KnownRHS.blsi();
1002
90
    }
1003
179M
    break;
1004
65.8M
  case Instruction::Or:
1005
65.8M
    KnownOut = KnownLHS | KnownRHS;
1006
65.8M
    break;
1007
68.2M
  case Instruction::Xor:
1008
68.2M
    KnownOut = KnownLHS ^ KnownRHS;
1009
    // xor(x, x-1) is common idioms that will clear all but lowest set
1010
    // bit. If we have a single known bit in x, we can clear all bits
1011
    // above it.
1012
    // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
1013
    // -1 but for the purpose of demanded bits (xor(x, x-C) &
1014
    // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
1015
    // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
1016
68.2M
    if (HasKnownOne &&
1017
68.2M
        
match(I, m_c_Xor(m_Value(X), m_Add(m_Deferred(X), m_AllOnes())))20.8M
) {
1018
2.75k
      const KnownBits &XBits = I->getOperand(0) == X ? 
KnownLHS0
: KnownRHS;
1019
2.75k
      KnownOut = XBits.blsmsk();
1020
2.75k
    }
1021
68.2M
    break;
1022
0
  default:
1023
0
    llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
1024
313M
  }
1025
1026
  // and(x, add (x, -1)) is a common idiom that always clears the low bit;
1027
  // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
1028
  // here we handle the more general case of adding any odd number by
1029
  // matching the form and/xor/or(x, add(x, y)) where y is odd.
1030
  // TODO: This could be generalized to clearing any bit set in y where the
1031
  // following bit is known to be unset in y.
1032
313M
  if (!KnownOut.Zero[0] && 
!KnownOut.One[0]266M
&&
1033
313M
      
(260M
match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y))))260M
||
1034
260M
       
match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y))))249M
||
1035
260M
       
match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X))))249M
)) {
1036
11.1M
    KnownBits KnownY(BitWidth);
1037
11.1M
    computeKnownBits(Y, DemandedElts, KnownY, Q, Depth + 1);
1038
11.1M
    if (KnownY.countMinTrailingOnes() > 0) {
1039
10.3M
      if (IsAnd)
1040
10.2M
        KnownOut.Zero.setBit(0);
1041
41.7k
      else
1042
41.7k
        KnownOut.One.setBit(0);
1043
10.3M
    }
1044
11.1M
  }
1045
313M
  return KnownOut;
1046
313M
}
1047
1048
static KnownBits computeKnownBitsForHorizontalOperation(
1049
    const Operator *I, const APInt &DemandedElts, const SimplifyQuery &Q,
1050
    unsigned Depth,
1051
    const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
1052
5.71k
        KnownBitsFunc) {
1053
5.71k
  APInt DemandedEltsLHS, DemandedEltsRHS;
1054
5.71k
  getHorizDemandedEltsForFirstOperand(Q.DL.getTypeSizeInBits(I->getType()),
1055
5.71k
                                      DemandedElts, DemandedEltsLHS,
1056
5.71k
                                      DemandedEltsRHS);
1057
1058
5.71k
  const auto ComputeForSingleOpFunc =
1059
9.26k
      [Depth, &Q, KnownBitsFunc](const Value *Op, APInt &DemandedEltsOp) {
1060
9.26k
        return KnownBitsFunc(
1061
9.26k
            computeKnownBits(Op, DemandedEltsOp, Q, Depth + 1),
1062
9.26k
            computeKnownBits(Op, DemandedEltsOp << 1, Q, Depth + 1));
1063
9.26k
      };
1064
1065
5.71k
  if (DemandedEltsRHS.isZero())
1066
1.63k
    return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS);
1067
4.07k
  if (DemandedEltsLHS.isZero())
1068
528
    return ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS);
1069
1070
3.55k
  return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS)
1071
3.55k
      .intersectWith(ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS));
1072
4.07k
}
1073
1074
// Public so this can be used in `SimplifyDemandedUseBits`.
1075
KnownBits llvm::analyzeKnownBitsFromAndXorOr(const Operator *I,
1076
                                             const KnownBits &KnownLHS,
1077
                                             const KnownBits &KnownRHS,
1078
                                             const SimplifyQuery &SQ,
1079
91.0M
                                             unsigned Depth) {
1080
91.0M
  auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
1081
91.0M
  APInt DemandedElts =
1082
91.0M
      FVTy ? 
APInt::getAllOnes(FVTy->getNumElements())337k
:
APInt(1, 1)90.6M
;
1083
1084
91.0M
  return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, SQ,
1085
91.0M
                                  Depth);
1086
91.0M
}
1087
1088
0
ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) {
1089
0
  Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
1090
  // Without vscale_range, we only know that vscale is non-zero.
1091
0
  if (!Attr.isValid())
1092
0
    return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth));
1093
1094
0
  unsigned AttrMin = Attr.getVScaleRangeMin();
1095
  // Minimum is larger than vscale width, result is always poison.
1096
0
  if ((unsigned)llvm::bit_width(AttrMin) > BitWidth)
1097
0
    return ConstantRange::getEmpty(BitWidth);
1098
1099
0
  APInt Min(BitWidth, AttrMin);
1100
0
  std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
1101
0
  if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth)
1102
0
    return ConstantRange(Min, APInt::getZero(BitWidth));
1103
1104
0
  return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
1105
0
}
1106
1107
void llvm::adjustKnownBitsForSelectArm(KnownBits &Known, Value *Cond,
1108
                                       Value *Arm, bool Invert,
1109
88.1M
                                       const SimplifyQuery &Q, unsigned Depth) {
1110
  // If we have a constant arm, we are done.
1111
88.1M
  if (Known.isConstant())
1112
30.4M
    return;
1113
1114
  // See what condition implies about the bits of the select arm.
1115
57.6M
  KnownBits CondRes(Known.getBitWidth());
1116
57.6M
  computeKnownBitsFromCond(Arm, Cond, CondRes, Q, Invert, Depth + 1);
1117
  // If we don't get any information from the condition, no reason to
1118
  // proceed.
1119
57.6M
  if (CondRes.isUnknown())
1120
55.3M
    return;
1121
1122
  // We can have conflict if the condition is dead. I.e if we have
1123
  // (x | 64) < 32 ? (x | 64) : y
1124
  // we will have conflict at bit 6 from the condition/the `or`.
1125
  // In that case just return. Its not particularly important
1126
  // what we do, as this select is going to be simplified soon.
1127
2.32M
  CondRes = CondRes.unionWith(Known);
1128
2.32M
  if (CondRes.hasConflict())
1129
575
    return;
1130
1131
  // Finally make sure the information we found is valid. This is relatively
1132
  // expensive so it's left for the very end.
1133
2.32M
  if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1))
1134
869k
    return;
1135
1136
  // Finally, we know we get information from the condition and its valid,
1137
  // so return it.
1138
1.45M
  Known = CondRes;
1139
1.45M
}
1140
1141
// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
1142
// Returns the input and lower/upper bounds.
1143
static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
1144
3.88M
                                const APInt *&CLow, const APInt *&CHigh) {
1145
3.88M
  assert(isa<Operator>(Select) &&
1146
3.88M
         cast<Operator>(Select)->getOpcode() == Instruction::Select &&
1147
3.88M
         "Input should be a Select!");
1148
1149
3.88M
  const Value *LHS = nullptr, *RHS = nullptr;
1150
3.88M
  SelectPatternFlavor SPF = matchSelectPattern(Select, LHS, RHS).Flavor;
1151
3.88M
  if (SPF != SPF_SMAX && 
SPF != SPF_SMIN3.88M
)
1152
3.87M
    return false;
1153
1154
1.57k
  if (!match(RHS, m_APInt(CLow)))
1155
1.19k
    return false;
1156
1157
377
  const Value *LHS2 = nullptr, *RHS2 = nullptr;
1158
377
  SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
1159
377
  if (getInverseMinMaxFlavor(SPF) != SPF2)
1160
371
    return false;
1161
1162
6
  if (!match(RHS2, m_APInt(CHigh)))
1163
0
    return false;
1164
1165
6
  if (SPF == SPF_SMIN)
1166
1
    std::swap(CLow, CHigh);
1167
1168
6
  In = LHS2;
1169
6
  return CLow->sle(*CHigh);
1170
6
}
1171
1172
static bool isSignedMinMaxIntrinsicClamp(const IntrinsicInst *II,
1173
                                         const APInt *&CLow,
1174
13.8M
                                         const APInt *&CHigh) {
1175
13.8M
  assert((II->getIntrinsicID() == Intrinsic::smin ||
1176
13.8M
          II->getIntrinsicID() == Intrinsic::smax) &&
1177
13.8M
         "Must be smin/smax");
1178
1179
13.8M
  Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
1180
13.8M
  auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1181
13.8M
  if (!InnerII || 
InnerII->getIntrinsicID() != InverseID1.81M
||
1182
13.8M
      
!match(II->getArgOperand(1), m_APInt(CLow))1.42M
||
1183
13.8M
      
!match(InnerII->getArgOperand(1), m_APInt(CHigh))1.37M
)
1184
12.5M
    return false;
1185
1186
1.35M
  if (II->getIntrinsicID() == Intrinsic::smin)
1187
1.35M
    std::swap(CLow, CHigh);
1188
1.35M
  return CLow->sle(*CHigh);
1189
13.8M
}
1190
1191
static void unionWithMinMaxIntrinsicClamp(const IntrinsicInst *II,
1192
13.3M
                                          KnownBits &Known) {
1193
13.3M
  const APInt *CLow, *CHigh;
1194
13.3M
  if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
1195
1.30M
    Known = Known.unionWith(
1196
1.30M
        ConstantRange::getNonEmpty(*CLow, *CHigh + 1).toKnownBits());
1197
13.3M
}
1198
1199
static void computeKnownBitsFromOperator(const Operator *I,
1200
                                         const APInt &DemandedElts,
1201
                                         KnownBits &Known,
1202
                                         const SimplifyQuery &Q,
1203
3.32G
                                         unsigned Depth) {
1204
3.32G
  unsigned BitWidth = Known.getBitWidth();
1205
1206
3.32G
  KnownBits Known2(BitWidth);
1207
3.32G
  switch (I->getOpcode()) {
1208
170M
  default: break;
1209
903M
  case Instruction::Load:
1210
903M
    if (MDNode *MD =
1211
903M
            Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1212
42.5M
      computeKnownBitsFromRangeMetadata(*MD, Known);
1213
903M
    break;
1214
118M
  case Instruction::And:
1215
118M
    computeKnownBits(I->getOperand(1), DemandedElts, Known, Q, Depth + 1);
1216
118M
    computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1217
1218
118M
    Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Q, Depth);
1219
118M
    break;
1220
48.0M
  case Instruction::Or:
1221
48.0M
    computeKnownBits(I->getOperand(1), DemandedElts, Known, Q, Depth + 1);
1222
48.0M
    computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1223
1224
48.0M
    Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Q, Depth);
1225
48.0M
    break;
1226
56.2M
  case Instruction::Xor:
1227
56.2M
    computeKnownBits(I->getOperand(1), DemandedElts, Known, Q, Depth + 1);
1228
56.2M
    computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1229
1230
56.2M
    Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Q, Depth);
1231
56.2M
    break;
1232
44.0M
  case Instruction::Mul: {
1233
44.0M
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1234
44.0M
    bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1235
44.0M
    computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, NUW,
1236
44.0M
                        DemandedElts, Known, Known2, Q, Depth);
1237
44.0M
    break;
1238
0
  }
1239
16.4M
  case Instruction::UDiv: {
1240
16.4M
    computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1241
16.4M
    computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1242
16.4M
    Known =
1243
16.4M
        KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1244
16.4M
    break;
1245
0
  }
1246
19.6M
  case Instruction::SDiv: {
1247
19.6M
    computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1248
19.6M
    computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1249
19.6M
    Known =
1250
19.6M
        KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1251
19.6M
    break;
1252
0
  }
1253
42.5M
  case Instruction::Select: {
1254
85.1M
    auto ComputeForArm = [&](Value *Arm, bool Invert) {
1255
85.1M
      KnownBits Res(Known.getBitWidth());
1256
85.1M
      computeKnownBits(Arm, DemandedElts, Res, Q, Depth + 1);
1257
85.1M
      adjustKnownBitsForSelectArm(Res, I->getOperand(0), Arm, Invert, Q, Depth);
1258
85.1M
      return Res;
1259
85.1M
    };
1260
    // Only known if known in both the LHS and RHS.
1261
42.5M
    Known =
1262
42.5M
        ComputeForArm(I->getOperand(1), /*Invert=*/false)
1263
42.5M
            .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true));
1264
42.5M
    break;
1265
0
  }
1266
0
  case Instruction::FPTrunc:
1267
0
  case Instruction::FPExt:
1268
702k
  case Instruction::FPToUI:
1269
2.97M
  case Instruction::FPToSI:
1270
2.97M
  case Instruction::SIToFP:
1271
2.97M
  case Instruction::UIToFP:
1272
2.97M
    break; // Can't work with floating point.
1273
219M
  case Instruction::PtrToInt:
1274
222M
  case Instruction::IntToPtr:
1275
    // Fall through and handle them the same as zext/trunc.
1276
222M
    [[fallthrough]];
1277
337M
  case Instruction::ZExt:
1278
385M
  case Instruction::Trunc: {
1279
385M
    Type *SrcTy = I->getOperand(0)->getType();
1280
1281
385M
    unsigned SrcBitWidth;
1282
    // Note that we handle pointer operands here because of inttoptr/ptrtoint
1283
    // which fall through here.
1284
385M
    Type *ScalarTy = SrcTy->getScalarType();
1285
385M
    SrcBitWidth = ScalarTy->isPointerTy() ?
1286
219M
      Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1287
385M
      
Q.DL.getTypeSizeInBits(ScalarTy)165M
;
1288
1289
385M
    assert(SrcBitWidth && "SrcBitWidth can't be zero");
1290
385M
    Known = Known.anyextOrTrunc(SrcBitWidth);
1291
385M
    computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1292
385M
    if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I);
1293
385M
        Inst && 
Inst->hasNonNeg()114M
&&
!Known.isNegative()40.2M
)
1294
40.2M
      Known.makeNonNegative();
1295
385M
    Known = Known.zextOrTrunc(BitWidth);
1296
385M
    break;
1297
337M
  }
1298
21.2M
  case Instruction::BitCast: {
1299
21.2M
    Type *SrcTy = I->getOperand(0)->getType();
1300
21.2M
    if (SrcTy->isIntOrPtrTy() &&
1301
        // TODO: For now, not handling conversions like:
1302
        // (bitcast i64 %x to <2 x i32>)
1303
21.2M
        
!I->getType()->isVectorTy()18.4k
) {
1304
331
      computeKnownBits(I->getOperand(0), Known, Q, Depth + 1);
1305
331
      break;
1306
331
    }
1307
1308
21.2M
    const Value *V;
1309
    // Handle bitcast from floating point to integer.
1310
21.2M
    if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
1311
21.2M
        
V->getType()->isFPOrFPVectorTy()4.11M
) {
1312
4.11M
      Type *FPType = V->getType()->getScalarType();
1313
4.11M
      KnownFPClass Result =
1314
4.11M
          computeKnownFPClass(V, DemandedElts, fcAllFlags, Q, Depth + 1);
1315
4.11M
      FPClassTest FPClasses = Result.KnownFPClasses;
1316
1317
      // TODO: Treat it as zero/poison if the use of I is unreachable.
1318
4.11M
      if (FPClasses == fcNone)
1319
0
        break;
1320
1321
4.11M
      if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
1322
4.24k
        Known.Zero.setAllBits();
1323
4.24k
        Known.One.setAllBits();
1324
1325
4.24k
        if (FPClasses & fcInf)
1326
777
          Known = Known.intersectWith(KnownBits::makeConstant(
1327
777
              APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
1328
1329
4.24k
        if (FPClasses & fcZero)
1330
3.46k
          Known = Known.intersectWith(KnownBits::makeConstant(
1331
3.46k
              APInt::getZero(FPType->getScalarSizeInBits())));
1332
1333
4.24k
        Known.Zero.clearSignBit();
1334
4.24k
        Known.One.clearSignBit();
1335
4.24k
      }
1336
1337
4.11M
      if (Result.SignBit) {
1338
648k
        if (*Result.SignBit)
1339
2.99k
          Known.makeNegative();
1340
645k
        else
1341
645k
          Known.makeNonNegative();
1342
648k
      }
1343
1344
4.11M
      break;
1345
4.11M
    }
1346
1347
    // Handle cast from vector integer type to scalar or vector integer.
1348
17.1M
    auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1349
17.1M
    if (!SrcVecTy || 
!SrcVecTy->getElementType()->isIntegerTy()17.1M
||
1350
17.1M
        
!I->getType()->isIntOrIntVectorTy()17.0M
||
1351
17.1M
        
isa<ScalableVectorType>(I->getType())17.0M
)
1352
60.0k
      break;
1353
1354
    // Look through a cast from narrow vector elements to wider type.
1355
    // Examples: v4i32 -> v2i64, v3i8 -> v24
1356
17.0M
    unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1357
17.0M
    if (BitWidth % SubBitWidth == 0) {
1358
      // Known bits are automatically intersected across demanded elements of a
1359
      // vector. So for example, if a bit is computed as known zero, it must be
1360
      // zero across all demanded elements of the vector.
1361
      //
1362
      // For this bitcast, each demanded element of the output is sub-divided
1363
      // across a set of smaller vector elements in the source vector. To get
1364
      // the known bits for an entire element of the output, compute the known
1365
      // bits for each sub-element sequentially. This is done by shifting the
1366
      // one-set-bit demanded elements parameter across the sub-elements for
1367
      // consecutive calls to computeKnownBits. We are using the demanded
1368
      // elements parameter as a mask operator.
1369
      //
1370
      // The known bits of each sub-element are then inserted into place
1371
      // (dependent on endian) to form the full result of known bits.
1372
16.1M
      unsigned NumElts = DemandedElts.getBitWidth();
1373
16.1M
      unsigned SubScale = BitWidth / SubBitWidth;
1374
16.1M
      APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1375
33.4M
      for (unsigned i = 0; i != NumElts; 
++i17.2M
) {
1376
17.2M
        if (DemandedElts[i])
1377
17.0M
          SubDemandedElts.setBit(i * SubScale);
1378
17.2M
      }
1379
1380
16.1M
      KnownBits KnownSrc(SubBitWidth);
1381
270M
      for (unsigned i = 0; i != SubScale; 
++i254M
) {
1382
254M
        computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q,
1383
254M
                         Depth + 1);
1384
254M
        unsigned ShiftElt = Q.DL.isLittleEndian() ? i : 
SubScale - 1 - i0
;
1385
254M
        Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1386
254M
      }
1387
16.1M
    }
1388
17.0M
    break;
1389
17.1M
  }
1390
29.2M
  case Instruction::SExt: {
1391
    // Compute the bits in the result that are not present in the input.
1392
29.2M
    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1393
1394
29.2M
    Known = Known.trunc(SrcBitWidth);
1395
29.2M
    computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1396
    // If the sign bit of the input is known set or clear, then we know the
1397
    // top bits of the result.
1398
29.2M
    Known = Known.sext(BitWidth);
1399
29.2M
    break;
1400
17.1M
  }
1401
63.3M
  case Instruction::Shl: {
1402
63.3M
    bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1403
63.3M
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1404
63.3M
    auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1405
63.3M
                         bool ShAmtNonZero) {
1406
63.3M
      return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
1407
63.3M
    };
1408
63.3M
    computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Q, Depth,
1409
63.3M
                                      KF);
1410
    // Trailing zeros of a right-shifted constant never decrease.
1411
63.3M
    const APInt *C;
1412
63.3M
    if (match(I->getOperand(0), m_APInt(C)))
1413
10.8M
      Known.Zero.setLowBits(C->countr_zero());
1414
63.3M
    break;
1415
17.1M
  }
1416
76.3M
  case Instruction::LShr: {
1417
76.3M
    bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1418
76.3M
    auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1419
76.3M
                      bool ShAmtNonZero) {
1420
76.3M
      return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1421
76.3M
    };
1422
76.3M
    computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Q, Depth,
1423
76.3M
                                      KF);
1424
    // Leading zeros of a left-shifted constant never decrease.
1425
76.3M
    const APInt *C;
1426
76.3M
    if (match(I->getOperand(0), m_APInt(C)))
1427
1.06M
      Known.Zero.setHighBits(C->countl_zero());
1428
76.3M
    break;
1429
17.1M
  }
1430
42.1M
  case Instruction::AShr: {
1431
42.1M
    bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1432
42.1M
    auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1433
42.1M
                      bool ShAmtNonZero) {
1434
42.1M
      return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1435
42.1M
    };
1436
42.1M
    computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Q, Depth,
1437
42.1M
                                      KF);
1438
42.1M
    break;
1439
17.1M
  }
1440
129M
  case Instruction::Sub: {
1441
129M
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1442
129M
    bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1443
129M
    computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, NUW,
1444
129M
                           DemandedElts, Known, Known2, Q, Depth);
1445
129M
    break;
1446
17.1M
  }
1447
254M
  case Instruction::Add: {
1448
254M
    bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1449
254M
    bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1450
254M
    computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, NUW,
1451
254M
                           DemandedElts, Known, Known2, Q, Depth);
1452
254M
    break;
1453
17.1M
  }
1454
2.78M
  case Instruction::SRem:
1455
2.78M
    computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1456
2.78M
    computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1457
2.78M
    Known = KnownBits::srem(Known, Known2);
1458
2.78M
    break;
1459
1460
4.06M
  case Instruction::URem:
1461
4.06M
    computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1462
4.06M
    computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1463
4.06M
    Known = KnownBits::urem(Known, Known2);
1464
4.06M
    break;
1465
93.1M
  case Instruction::Alloca:
1466
93.1M
    Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1467
93.1M
    break;
1468
197M
  case Instruction::GetElementPtr: {
1469
    // Analyze all of the subscripts of this getelementptr instruction
1470
    // to determine if we can prove known low zero bits.
1471
197M
    computeKnownBits(I->getOperand(0), Known, Q, Depth + 1);
1472
    // Accumulate the constant indices in a separate variable
1473
    // to minimize the number of calls to computeForAddSub.
1474
197M
    unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(I->getType());
1475
197M
    APInt AccConstIndices(IndexWidth, 0);
1476
1477
197M
    auto AddIndexToKnown = [&](KnownBits IndexBits) {
1478
76.4M
      if (IndexWidth == BitWidth) {
1479
        // Note that inbounds does *not* guarantee nsw for the addition, as only
1480
        // the offset is signed, while the base address is unsigned.
1481
76.4M
        Known = KnownBits::add(Known, IndexBits);
1482
76.4M
      } else {
1483
        // If the index width is smaller than the pointer width, only add the
1484
        // value to the low bits.
1485
0
        assert(IndexWidth < BitWidth &&
1486
0
               "Index width can't be larger than pointer width");
1487
0
        Known.insertBits(KnownBits::add(Known.trunc(IndexWidth), IndexBits), 0);
1488
0
      }
1489
76.4M
    };
1490
1491
197M
    gep_type_iterator GTI = gep_type_begin(I);
1492
274M
    for (unsigned i = 1, e = I->getNumOperands(); i != e; 
++i, ++GTI77.5M
) {
1493
      // TrailZ can only become smaller, short-circuit if we hit zero.
1494
198M
      if (Known.isUnknown())
1495
120M
        break;
1496
1497
77.5M
      Value *Index = I->getOperand(i);
1498
1499
      // Handle case when index is zero.
1500
77.5M
      Constant *CIndex = dyn_cast<Constant>(Index);
1501
77.5M
      if (CIndex && 
CIndex->isZeroValue()74.2M
)
1502
1.08M
        continue;
1503
1504
76.4M
      if (StructType *STy = GTI.getStructTypeOrNull()) {
1505
        // Handle struct member offset arithmetic.
1506
1507
21.2k
        assert(CIndex &&
1508
21.2k
               "Access to structure field must be known at compile time");
1509
1510
21.2k
        if (CIndex->getType()->isVectorTy())
1511
0
          Index = CIndex->getSplatValue();
1512
1513
21.2k
        unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1514
21.2k
        const StructLayout *SL = Q.DL.getStructLayout(STy);
1515
21.2k
        uint64_t Offset = SL->getElementOffset(Idx);
1516
21.2k
        AccConstIndices += Offset;
1517
21.2k
        continue;
1518
21.2k
      }
1519
1520
      // Handle array index arithmetic.
1521
76.4M
      Type *IndexedTy = GTI.getIndexedType();
1522
76.4M
      if (!IndexedTy->isSized()) {
1523
0
        Known.resetAll();
1524
0
        break;
1525
0
      }
1526
1527
76.4M
      TypeSize Stride = GTI.getSequentialElementStride(Q.DL);
1528
76.4M
      uint64_t StrideInBytes = Stride.getKnownMinValue();
1529
76.4M
      if (!Stride.isScalable()) {
1530
        // Fast path for constant offset.
1531
76.4M
        if (auto *CI = dyn_cast<ConstantInt>(Index)) {
1532
73.1M
          AccConstIndices +=
1533
73.1M
              CI->getValue().sextOrTrunc(IndexWidth) * StrideInBytes;
1534
73.1M
          continue;
1535
73.1M
        }
1536
76.4M
      }
1537
1538
3.32M
      KnownBits IndexBits =
1539
3.32M
          computeKnownBits(Index, Q, Depth + 1).sextOrTrunc(IndexWidth);
1540
3.32M
      KnownBits ScalingFactor(IndexWidth);
1541
      // Multiply by current sizeof type.
1542
      // &A[i] == A + i * sizeof(*A[i]).
1543
3.32M
      if (Stride.isScalable()) {
1544
        // For scalable types the only thing we know about sizeof is
1545
        // that this is a multiple of the minimum size.
1546
0
        ScalingFactor.Zero.setLowBits(llvm::countr_zero(StrideInBytes));
1547
3.32M
      } else {
1548
3.32M
        ScalingFactor =
1549
3.32M
            KnownBits::makeConstant(APInt(IndexWidth, StrideInBytes));
1550
3.32M
      }
1551
3.32M
      AddIndexToKnown(KnownBits::mul(IndexBits, ScalingFactor));
1552
3.32M
    }
1553
197M
    if (!Known.isUnknown() && 
!AccConstIndices.isZero()75.4M
)
1554
73.1M
      AddIndexToKnown(KnownBits::makeConstant(AccConstIndices));
1555
197M
    break;
1556
17.1M
  }
1557
399M
  case Instruction::PHI: {
1558
399M
    const PHINode *P = cast<PHINode>(I);
1559
399M
    BinaryOperator *BO = nullptr;
1560
399M
    Value *R = nullptr, *L = nullptr;
1561
399M
    if (matchSimpleRecurrence(P, BO, R, L)) {
1562
      // Handle the case of a simple two-predecessor recurrence PHI.
1563
      // There's a lot more that could theoretically be done here, but
1564
      // this is sufficient to catch some interesting cases.
1565
130M
      unsigned Opcode = BO->getOpcode();
1566
1567
130M
      switch (Opcode) {
1568
      // If this is a shift recurrence, we know the bits being shifted in. We
1569
      // can combine that with information about the start value of the
1570
      // recurrence to conclude facts about the result. If this is a udiv
1571
      // recurrence, we know that the result can never exceed either the
1572
      // numerator or the start value, whichever is greater.
1573
5.07M
      case Instruction::LShr:
1574
5.19M
      case Instruction::AShr:
1575
6.50M
      case Instruction::Shl:
1576
13.0M
      case Instruction::UDiv:
1577
13.0M
        if (BO->getOperand(0) != I)
1578
0
          break;
1579
13.0M
        [[fallthrough]];
1580
1581
      // For a urem recurrence, the result can never exceed the start value. The
1582
      // phi could either be the numerator or the denominator.
1583
13.1M
      case Instruction::URem: {
1584
        // We have matched a recurrence of the form:
1585
        // %iv = [R, %entry], [%iv.next, %backedge]
1586
        // %iv.next = shift_op %iv, L
1587
1588
        // Recurse with the phi context to avoid concern about whether facts
1589
        // inferred hold at original context instruction.  TODO: It may be
1590
        // correct to use the original context.  IF warranted, explore and
1591
        // add sufficient tests to cover.
1592
13.1M
        SimplifyQuery RecQ = Q.getWithoutCondContext();
1593
13.1M
        RecQ.CxtI = P;
1594
13.1M
        computeKnownBits(R, DemandedElts, Known2, RecQ, Depth + 1);
1595
13.1M
        switch (Opcode) {
1596
1.31M
        case Instruction::Shl:
1597
          // A shl recurrence will only increase the tailing zeros
1598
1.31M
          Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1599
1.31M
          break;
1600
5.07M
        case Instruction::LShr:
1601
11.6M
        case Instruction::UDiv:
1602
11.6M
        case Instruction::URem:
1603
          // lshr, udiv, and urem recurrences will preserve the leading zeros of
1604
          // the start value.
1605
11.6M
          Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1606
11.6M
          break;
1607
124k
        case Instruction::AShr:
1608
          // An ashr recurrence will extend the initial sign bit
1609
124k
          Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1610
124k
          Known.One.setHighBits(Known2.countMinLeadingOnes());
1611
124k
          break;
1612
13.1M
        }
1613
13.1M
        break;
1614
13.1M
      }
1615
1616
      // Check for operations that have the property that if
1617
      // both their operands have low zero bits, the result
1618
      // will have low zero bits.
1619
98.6M
      case Instruction::Add:
1620
102M
      case Instruction::Sub:
1621
115M
      case Instruction::And:
1622
116M
      case Instruction::Or:
1623
117M
      case Instruction::Mul: {
1624
        // Change the context instruction to the "edge" that flows into the
1625
        // phi. This is important because that is where the value is actually
1626
        // "evaluated" even though it is used later somewhere else. (see also
1627
        // D69571).
1628
117M
        SimplifyQuery RecQ = Q.getWithoutCondContext();
1629
1630
117M
        unsigned OpNum = P->getOperand(0) == R ? 
062.3M
:
154.7M
;
1631
117M
        Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1632
117M
        Instruction *LInst = P->getIncomingBlock(1 - OpNum)->getTerminator();
1633
1634
        // Ok, we have a PHI of the form L op= R. Check for low
1635
        // zero bits.
1636
117M
        RecQ.CxtI = RInst;
1637
117M
        computeKnownBits(R, DemandedElts, Known2, RecQ, Depth + 1);
1638
1639
        // We need to take the minimum number of known bits
1640
117M
        KnownBits Known3(BitWidth);
1641
117M
        RecQ.CxtI = LInst;
1642
117M
        computeKnownBits(L, DemandedElts, Known3, RecQ, Depth + 1);
1643
1644
117M
        Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1645
117M
                                       Known3.countMinTrailingZeros()));
1646
1647
117M
        auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1648
117M
        if (!OverflowOp || 
!Q.IIQ.hasNoSignedWrap(OverflowOp)103M
)
1649
64.6M
          break;
1650
1651
52.4M
        switch (Opcode) {
1652
        // If initial value of recurrence is nonnegative, and we are adding
1653
        // a nonnegative number with nsw, the result can only be nonnegative
1654
        // or poison value regardless of the number of times we execute the
1655
        // add in phi recurrence. If initial value is negative and we are
1656
        // adding a negative number with nsw, the result can only be
1657
        // negative or poison value. Similar arguments apply to sub and mul.
1658
        //
1659
        // (add non-negative, non-negative) --> non-negative
1660
        // (add negative, negative) --> negative
1661
50.7M
        case Instruction::Add: {
1662
50.7M
          if (Known2.isNonNegative() && 
Known3.isNonNegative()39.9M
)
1663
35.7M
            Known.makeNonNegative();
1664
15.0M
          else if (Known2.isNegative() && 
Known3.isNegative()251k
)
1665
16.7k
            Known.makeNegative();
1666
50.7M
          break;
1667
0
        }
1668
1669
        // (sub nsw non-negative, negative) --> non-negative
1670
        // (sub nsw negative, non-negative) --> negative
1671
1.59M
        case Instruction::Sub: {
1672
1.59M
          if (BO->getOperand(0) != I)
1673
19.4k
            break;
1674
1.57M
          if (Known2.isNonNegative() && 
Known3.isNegative()457k
)
1675
0
            Known.makeNonNegative();
1676
1.57M
          else if (Known2.isNegative() && 
Known3.isNonNegative()483
)
1677
139
            Known.makeNegative();
1678
1.57M
          break;
1679
1.59M
        }
1680
1681
        // (mul nsw non-negative, non-negative) --> non-negative
1682
45.2k
        case Instruction::Mul:
1683
45.2k
          if (Known2.isNonNegative() && 
Known3.isNonNegative()37.1k
)
1684
10.7k
            Known.makeNonNegative();
1685
45.2k
          break;
1686
1687
0
        default:
1688
0
          break;
1689
52.4M
        }
1690
52.4M
        break;
1691
52.4M
      }
1692
1693
52.4M
      default:
1694
544k
        break;
1695
130M
      }
1696
130M
    }
1697
1698
    // Unreachable blocks may have zero-operand PHI nodes.
1699
399M
    if (P->getNumIncomingValues() == 0)
1700
0
      break;
1701
1702
    // Otherwise take the unions of the known bit sets of the operands,
1703
    // taking conservative care to avoid excessive recursion.
1704
399M
    if (Depth < MaxAnalysisRecursionDepth - 1 && 
Known.isUnknown()335M
) {
1705
      // Skip if every incoming value references to ourself.
1706
298M
      if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1707
245
        break;
1708
1709
298M
      Known.Zero.setAllBits();
1710
298M
      Known.One.setAllBits();
1711
393M
      for (const Use &U : P->operands()) {
1712
393M
        Value *IncValue;
1713
393M
        const PHINode *CxtPhi;
1714
393M
        Instruction *CxtI;
1715
393M
        breakSelfRecursivePHI(&U, P, IncValue, CxtI, &CxtPhi);
1716
        // Skip direct self references.
1717
393M
        if (IncValue == P)
1718
401k
          continue;
1719
1720
        // Change the context instruction to the "edge" that flows into the
1721
        // phi. This is important because that is where the value is actually
1722
        // "evaluated" even though it is used later somewhere else. (see also
1723
        // D69571).
1724
393M
        SimplifyQuery RecQ = Q.getWithoutCondContext().getWithInstruction(CxtI);
1725
1726
393M
        Known2 = KnownBits(BitWidth);
1727
1728
        // Recurse, but cap the recursion to one level, because we don't
1729
        // want to waste time spinning around in loops.
1730
        // TODO: See if we can base recursion limiter on number of incoming phi
1731
        // edges so we don't overly clamp analysis.
1732
393M
        computeKnownBits(IncValue, DemandedElts, Known2, RecQ,
1733
393M
                         MaxAnalysisRecursionDepth - 1);
1734
1735
        // See if we can further use a conditional branch into the phi
1736
        // to help us determine the range of the value.
1737
393M
        if (!Known2.isConstant()) {
1738
331M
          CmpPredicate Pred;
1739
331M
          const APInt *RHSC;
1740
331M
          BasicBlock *TrueSucc, *FalseSucc;
1741
          // TODO: Use RHS Value and compute range from its known bits.
1742
331M
          if (match(RecQ.CxtI,
1743
331M
                    m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)),
1744
331M
                         m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
1745
            // Check for cases of duplicate successors.
1746
30.5M
            if ((TrueSucc == CxtPhi->getParent()) !=
1747
30.5M
                (FalseSucc == CxtPhi->getParent())) {
1748
              // If we're using the false successor, invert the predicate.
1749
30.5M
              if (FalseSucc == CxtPhi->getParent())
1750
23.1M
                Pred = CmpInst::getInversePredicate(Pred);
1751
              // Get the knownbits implied by the incoming phi condition.
1752
30.5M
              auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
1753
30.5M
              KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits());
1754
              // We can have conflicts here if we are analyzing deadcode (its
1755
              // impossible for us reach this BB based the icmp).
1756
30.5M
              if (KnownUnion.hasConflict()) {
1757
                // No reason to continue analyzing in a known dead region, so
1758
                // just resetAll and break. This will cause us to also exit the
1759
                // outer loop.
1760
76
                Known.resetAll();
1761
76
                break;
1762
76
              }
1763
30.5M
              Known2 = KnownUnion;
1764
30.5M
            }
1765
30.5M
          }
1766
331M
        }
1767
1768
393M
        Known = Known.intersectWith(Known2);
1769
        // If all bits have been ruled out, there's no need to check
1770
        // more operands.
1771
393M
        if (Known.isUnknown())
1772
281M
          break;
1773
393M
      }
1774
298M
    }
1775
399M
    break;
1776
399M
  }
1777
399M
  case Instruction::Call:
1778
172M
  case Instruction::Invoke: {
1779
    // If range metadata is attached to this call, set known bits from that,
1780
    // and then intersect with known bits based on other properties of the
1781
    // function.
1782
172M
    if (MDNode *MD =
1783
172M
            Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1784
613k
      computeKnownBitsFromRangeMetadata(*MD, Known);
1785
1786
172M
    const auto *CB = cast<CallBase>(I);
1787
1788
172M
    if (std::optional<ConstantRange> Range = CB->getRange())
1789
13.6M
      Known = Known.unionWith(Range->toKnownBits());
1790
1791
172M
    if (const Value *RV = CB->getReturnedArgOperand()) {
1792
203
      if (RV->getType() == I->getType()) {
1793
203
        computeKnownBits(RV, Known2, Q, Depth + 1);
1794
203
        Known = Known.unionWith(Known2);
1795
        // If the function doesn't return properly for all input values
1796
        // (e.g. unreachable exits) then there might be conflicts between the
1797
        // argument value and the range metadata. Simply discard the known bits
1798
        // in case of conflicts.
1799
203
        if (Known.hasConflict())
1800
0
          Known.resetAll();
1801
203
      }
1802
203
    }
1803
172M
    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1804
77.9M
      switch (II->getIntrinsicID()) {
1805
2.24M
      default:
1806
2.24M
        break;
1807
2.95M
      case Intrinsic::abs: {
1808
2.95M
        computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1809
2.95M
        bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1810
2.95M
        Known = Known2.abs(IntMinIsPoison);
1811
2.95M
        break;
1812
0
      }
1813
21.3k
      case Intrinsic::bitreverse:
1814
21.3k
        computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1815
21.3k
        Known.Zero |= Known2.Zero.reverseBits();
1816
21.3k
        Known.One |= Known2.One.reverseBits();
1817
21.3k
        break;
1818
2.33M
      case Intrinsic::bswap:
1819
2.33M
        computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1820
2.33M
        Known.Zero |= Known2.Zero.byteSwap();
1821
2.33M
        Known.One |= Known2.One.byteSwap();
1822
2.33M
        break;
1823
3.76M
      case Intrinsic::ctlz: {
1824
3.76M
        computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1825
        // If we have a known 1, its position is our upper bound.
1826
3.76M
        unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1827
        // If this call is poison for 0 input, the result will be less than 2^n.
1828
3.76M
        if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1829
3.29M
          PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1830
3.76M
        unsigned LowBits = llvm::bit_width(PossibleLZ);
1831
3.76M
        Known.Zero.setBitsFrom(LowBits);
1832
3.76M
        break;
1833
0
      }
1834
4.69M
      case Intrinsic::cttz: {
1835
4.69M
        computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1836
        // If we have a known 1, its position is our upper bound.
1837
4.69M
        unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1838
        // If this call is poison for 0 input, the result will be less than 2^n.
1839
4.69M
        if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1840
4.53M
          PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1841
4.69M
        unsigned LowBits = llvm::bit_width(PossibleTZ);
1842
4.69M
        Known.Zero.setBitsFrom(LowBits);
1843
4.69M
        break;
1844
0
      }
1845
835k
      case Intrinsic::ctpop: {
1846
835k
        computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1847
        // We can bound the space the count needs.  Also, bits known to be zero
1848
        // can't contribute to the population.
1849
835k
        unsigned BitsPossiblySet = Known2.countMaxPopulation();
1850
835k
        unsigned LowBits = llvm::bit_width(BitsPossiblySet);
1851
835k
        Known.Zero.setBitsFrom(LowBits);
1852
        // TODO: we could bound KnownOne using the lower bound on the number
1853
        // of bits which might be set provided by popcnt KnownOne2.
1854
835k
        break;
1855
0
      }
1856
290k
      case Intrinsic::fshr:
1857
15.5M
      case Intrinsic::fshl: {
1858
15.5M
        const APInt *SA;
1859
15.5M
        if (!match(I->getOperand(2), m_APInt(SA)))
1860
342k
          break;
1861
1862
        // Normalize to funnel shift left.
1863
15.2M
        uint64_t ShiftAmt = SA->urem(BitWidth);
1864
15.2M
        if (II->getIntrinsicID() == Intrinsic::fshr)
1865
2.68k
          ShiftAmt = BitWidth - ShiftAmt;
1866
1867
15.2M
        KnownBits Known3(BitWidth);
1868
15.2M
        computeKnownBits(I->getOperand(0), DemandedElts, Known2, Q, Depth + 1);
1869
15.2M
        computeKnownBits(I->getOperand(1), DemandedElts, Known3, Q, Depth + 1);
1870
1871
15.2M
        Known.Zero =
1872
15.2M
            Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1873
15.2M
        Known.One =
1874
15.2M
            Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1875
15.2M
        break;
1876
15.5M
      }
1877
161k
      case Intrinsic::uadd_sat:
1878
161k
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1879
161k
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1880
161k
        Known = KnownBits::uadd_sat(Known, Known2);
1881
161k
        break;
1882
3.41M
      case Intrinsic::usub_sat:
1883
3.41M
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1884
3.41M
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1885
3.41M
        Known = KnownBits::usub_sat(Known, Known2);
1886
3.41M
        break;
1887
44.1k
      case Intrinsic::sadd_sat:
1888
44.1k
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1889
44.1k
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1890
44.1k
        Known = KnownBits::sadd_sat(Known, Known2);
1891
44.1k
        break;
1892
11.4k
      case Intrinsic::ssub_sat:
1893
11.4k
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1894
11.4k
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1895
11.4k
        Known = KnownBits::ssub_sat(Known, Known2);
1896
11.4k
        break;
1897
        // Vec reverse preserves bits from input vec.
1898
0
      case Intrinsic::vector_reverse:
1899
0
        computeKnownBits(I->getOperand(0), DemandedElts.reverseBits(), Known, Q,
1900
0
                         Depth + 1);
1901
0
        break;
1902
        // for min/max/and/or reduce, any bit common to each element in the
1903
        // input vec is set in the output.
1904
0
      case Intrinsic::vector_reduce_and:
1905
375
      case Intrinsic::vector_reduce_or:
1906
375
      case Intrinsic::vector_reduce_umax:
1907
375
      case Intrinsic::vector_reduce_umin:
1908
375
      case Intrinsic::vector_reduce_smax:
1909
375
      case Intrinsic::vector_reduce_smin:
1910
375
        computeKnownBits(I->getOperand(0), Known, Q, Depth + 1);
1911
375
        break;
1912
0
      case Intrinsic::vector_reduce_xor: {
1913
0
        computeKnownBits(I->getOperand(0), Known, Q, Depth + 1);
1914
        // The zeros common to all vecs are zero in the output.
1915
        // If the number of elements is odd, then the common ones remain. If the
1916
        // number of elements is even, then the common ones becomes zeros.
1917
0
        auto *VecTy = cast<VectorType>(I->getOperand(0)->getType());
1918
        // Even, so the ones become zeros.
1919
0
        bool EvenCnt = VecTy->getElementCount().isKnownEven();
1920
0
        if (EvenCnt)
1921
0
          Known.Zero |= Known.One;
1922
        // Maybe even element count so need to clear ones.
1923
0
        if (VecTy->isScalableTy() || EvenCnt)
1924
0
          Known.One.clearAllBits();
1925
0
        break;
1926
375
      }
1927
12.2M
      case Intrinsic::umin:
1928
12.2M
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1929
12.2M
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1930
12.2M
        Known = KnownBits::umin(Known, Known2);
1931
12.2M
        break;
1932
16.2M
      case Intrinsic::umax:
1933
16.2M
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1934
16.2M
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1935
16.2M
        Known = KnownBits::umax(Known, Known2);
1936
16.2M
        break;
1937
9.63M
      case Intrinsic::smin:
1938
9.63M
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1939
9.63M
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1940
9.63M
        Known = KnownBits::smin(Known, Known2);
1941
9.63M
        unionWithMinMaxIntrinsicClamp(II, Known);
1942
9.63M
        break;
1943
3.69M
      case Intrinsic::smax:
1944
3.69M
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1945
3.69M
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1946
3.69M
        Known = KnownBits::smax(Known, Known2);
1947
3.69M
        unionWithMinMaxIntrinsicClamp(II, Known);
1948
3.69M
        break;
1949
11.5k
      case Intrinsic::ptrmask: {
1950
11.5k
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1951
1952
11.5k
        const Value *Mask = I->getOperand(1);
1953
11.5k
        Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1954
11.5k
        computeKnownBits(Mask, DemandedElts, Known2, Q, Depth + 1);
1955
        // TODO: 1-extend would be more precise.
1956
11.5k
        Known &= Known2.anyextOrTrunc(BitWidth);
1957
11.5k
        break;
1958
375
      }
1959
26.6k
      case Intrinsic::x86_sse2_pmulh_w:
1960
26.6k
      case Intrinsic::x86_avx2_pmulh_w:
1961
26.6k
      case Intrinsic::x86_avx512_pmulh_w_512:
1962
26.6k
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1963
26.6k
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1964
26.6k
        Known = KnownBits::mulhs(Known, Known2);
1965
26.6k
        break;
1966
18.7k
      case Intrinsic::x86_sse2_pmulhu_w:
1967
18.8k
      case Intrinsic::x86_avx2_pmulhu_w:
1968
18.8k
      case Intrinsic::x86_avx512_pmulhu_w_512:
1969
18.8k
        computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
1970
18.8k
        computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
1971
18.8k
        Known = KnownBits::mulhu(Known, Known2);
1972
18.8k
        break;
1973
53.1k
      case Intrinsic::x86_sse42_crc32_64_64:
1974
53.1k
        Known.Zero.setBitsFrom(32);
1975
53.1k
        break;
1976
2.26k
      case Intrinsic::x86_ssse3_phadd_d_128:
1977
2.26k
      case Intrinsic::x86_ssse3_phadd_w_128:
1978
5.71k
      case Intrinsic::x86_avx2_phadd_d:
1979
5.71k
      case Intrinsic::x86_avx2_phadd_w: {
1980
5.71k
        Known = computeKnownBitsForHorizontalOperation(
1981
5.71k
            I, DemandedElts, Q, Depth,
1982
9.26k
            [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1983
9.26k
              return KnownBits::add(KnownLHS, KnownRHS);
1984
9.26k
            });
1985
5.71k
        break;
1986
5.71k
      }
1987
0
      case Intrinsic::x86_ssse3_phadd_sw_128:
1988
0
      case Intrinsic::x86_avx2_phadd_sw: {
1989
0
        Known = computeKnownBitsForHorizontalOperation(
1990
0
            I, DemandedElts, Q, Depth, KnownBits::sadd_sat);
1991
0
        break;
1992
0
      }
1993
0
      case Intrinsic::x86_ssse3_phsub_d_128:
1994
0
      case Intrinsic::x86_ssse3_phsub_w_128:
1995
0
      case Intrinsic::x86_avx2_phsub_d:
1996
0
      case Intrinsic::x86_avx2_phsub_w: {
1997
0
        Known = computeKnownBitsForHorizontalOperation(
1998
0
            I, DemandedElts, Q, Depth,
1999
0
            [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
2000
0
              return KnownBits::sub(KnownLHS, KnownRHS);
2001
0
            });
2002
0
        break;
2003
0
      }
2004
0
      case Intrinsic::x86_ssse3_phsub_sw_128:
2005
0
      case Intrinsic::x86_avx2_phsub_sw: {
2006
0
        Known = computeKnownBitsForHorizontalOperation(
2007
0
            I, DemandedElts, Q, Depth, KnownBits::ssub_sat);
2008
0
        break;
2009
0
      }
2010
0
      case Intrinsic::riscv_vsetvli:
2011
0
      case Intrinsic::riscv_vsetvlimax: {
2012
0
        bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
2013
0
        const ConstantRange Range = getVScaleRange(II->getFunction(), BitWidth);
2014
0
        uint64_t SEW = RISCVVType::decodeVSEW(
2015
0
            cast<ConstantInt>(II->getArgOperand(HasAVL))->getZExtValue());
2016
0
        RISCVVType::VLMUL VLMUL = static_cast<RISCVVType::VLMUL>(
2017
0
            cast<ConstantInt>(II->getArgOperand(1 + HasAVL))->getZExtValue());
2018
0
        uint64_t MaxVLEN =
2019
0
            Range.getUnsignedMax().getZExtValue() * RISCV::RVVBitsPerBlock;
2020
0
        uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMUL);
2021
2022
        // Result of vsetvli must be not larger than AVL.
2023
0
        if (HasAVL)
2024
0
          if (auto *CI = dyn_cast<ConstantInt>(II->getArgOperand(0)))
2025
0
            MaxVL = std::min(MaxVL, CI->getZExtValue());
2026
2027
0
        unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
2028
0
        if (BitWidth > KnownZeroFirstBit)
2029
0
          Known.Zero.setBitsFrom(KnownZeroFirstBit);
2030
0
        break;
2031
0
      }
2032
0
      case Intrinsic::vscale: {
2033
0
        if (!II->getParent() || !II->getFunction())
2034
0
          break;
2035
2036
0
        Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits();
2037
0
        break;
2038
0
      }
2039
77.9M
      }
2040
77.9M
    }
2041
172M
    break;
2042
172M
  }
2043
172M
  case Instruction::ShuffleVector: {
2044
1.56M
    auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
2045
    // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
2046
1.56M
    if (!Shuf) {
2047
0
      Known.resetAll();
2048
0
      return;
2049
0
    }
2050
    // For undef elements, we don't know anything about the common state of
2051
    // the shuffle result.
2052
1.56M
    APInt DemandedLHS, DemandedRHS;
2053
1.56M
    if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
2054
9.42k
      Known.resetAll();
2055
9.42k
      return;
2056
9.42k
    }
2057
1.55M
    Known.One.setAllBits();
2058
1.55M
    Known.Zero.setAllBits();
2059
1.55M
    if (!!DemandedLHS) {
2060
1.36M
      const Value *LHS = Shuf->getOperand(0);
2061
1.36M
      computeKnownBits(LHS, DemandedLHS, Known, Q, Depth + 1);
2062
      // If we don't know any bits, early out.
2063
1.36M
      if (Known.isUnknown())
2064
1.10M
        break;
2065
1.36M
    }
2066
448k
    if (!!DemandedRHS) {
2067
204k
      const Value *RHS = Shuf->getOperand(1);
2068
204k
      computeKnownBits(RHS, DemandedRHS, Known2, Q, Depth + 1);
2069
204k
      Known = Known.intersectWith(Known2);
2070
204k
    }
2071
448k
    break;
2072
1.55M
  }
2073
470k
  case Instruction::InsertElement: {
2074
470k
    if (isa<ScalableVectorType>(I->getType())) {
2075
0
      Known.resetAll();
2076
0
      return;
2077
0
    }
2078
470k
    const Value *Vec = I->getOperand(0);
2079
470k
    const Value *Elt = I->getOperand(1);
2080
470k
    auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
2081
470k
    unsigned NumElts = DemandedElts.getBitWidth();
2082
470k
    APInt DemandedVecElts = DemandedElts;
2083
470k
    bool NeedsElt = true;
2084
    // If we know the index we are inserting too, clear it from Vec check.
2085
470k
    if (CIdx && CIdx->getValue().ult(NumElts)) {
2086
470k
      DemandedVecElts.clearBit(CIdx->getZExtValue());
2087
470k
      NeedsElt = DemandedElts[CIdx->getZExtValue()];
2088
470k
    }
2089
2090
470k
    Known.One.setAllBits();
2091
470k
    Known.Zero.setAllBits();
2092
470k
    if (NeedsElt) {
2093
447k
      computeKnownBits(Elt, Known, Q, Depth + 1);
2094
      // If we don't know any bits, early out.
2095
447k
      if (Known.isUnknown())
2096
233k
        break;
2097
447k
    }
2098
2099
237k
    if (!DemandedVecElts.isZero()) {
2100
31.0k
      computeKnownBits(Vec, DemandedVecElts, Known2, Q, Depth + 1);
2101
31.0k
      Known = Known.intersectWith(Known2);
2102
31.0k
    }
2103
237k
    break;
2104
470k
  }
2105
302k
  case Instruction::ExtractElement: {
2106
    // Look through extract element. If the index is non-constant or
2107
    // out-of-range demand all elements, otherwise just the extracted element.
2108
302k
    const Value *Vec = I->getOperand(0);
2109
302k
    const Value *Idx = I->getOperand(1);
2110
302k
    auto *CIdx = dyn_cast<ConstantInt>(Idx);
2111
302k
    if (isa<ScalableVectorType>(Vec->getType())) {
2112
      // FIXME: there's probably *something* we can do with scalable vectors
2113
0
      Known.resetAll();
2114
0
      break;
2115
0
    }
2116
302k
    unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2117
302k
    APInt DemandedVecElts = APInt::getAllOnes(NumElts);
2118
302k
    if (CIdx && 
CIdx->getValue().ult(NumElts)302k
)
2119
302k
      DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
2120
302k
    computeKnownBits(Vec, DemandedVecElts, Known, Q, Depth + 1);
2121
302k
    break;
2122
302k
  }
2123
30.2M
  case Instruction::ExtractValue:
2124
30.2M
    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
2125
1.44M
      const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
2126
1.44M
      if (EVI->getNumIndices() != 1) 
break0
;
2127
1.44M
      if (EVI->getIndices()[0] == 0) {
2128
1.03M
        switch (II->getIntrinsicID()) {
2129
6.16k
        default: break;
2130
514k
        case Intrinsic::uadd_with_overflow:
2131
603k
        case Intrinsic::sadd_with_overflow:
2132
603k
          computeKnownBitsAddSub(
2133
603k
              true, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2134
603k
              /* NUW=*/false, DemandedElts, Known, Known2, Q, Depth);
2135
603k
          break;
2136
0
        case Intrinsic::usub_with_overflow:
2137
21.6k
        case Intrinsic::ssub_with_overflow:
2138
21.6k
          computeKnownBitsAddSub(
2139
21.6k
              false, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2140
21.6k
              /* NUW=*/false, DemandedElts, Known, Known2, Q, Depth);
2141
21.6k
          break;
2142
245k
        case Intrinsic::umul_with_overflow:
2143
404k
        case Intrinsic::smul_with_overflow:
2144
404k
          computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
2145
404k
                              false, DemandedElts, Known, Known2, Q, Depth);
2146
404k
          break;
2147
1.03M
        }
2148
1.03M
      }
2149
1.44M
    }
2150
30.2M
    break;
2151
30.2M
  case Instruction::Freeze:
2152
3.80M
    if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
2153
3.80M
                                  Depth + 1))
2154
90.5k
      computeKnownBits(I->getOperand(0), Known, Q, Depth + 1);
2155
3.80M
    break;
2156
3.32G
  }
2157
3.32G
}
2158
2159
/// Determine which bits of V are known to be either zero or one and return
2160
/// them.
2161
KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
2162
173M
                                 const SimplifyQuery &Q, unsigned Depth) {
2163
173M
  KnownBits Known(getBitWidth(V->getType(), Q.DL));
2164
173M
  ::computeKnownBits(V, DemandedElts, Known, Q, Depth);
2165
173M
  return Known;
2166
173M
}
2167
2168
/// Determine which bits of V are known to be either zero or one and return
2169
/// them.
2170
KnownBits llvm::computeKnownBits(const Value *V, const SimplifyQuery &Q,
2171
882M
                                 unsigned Depth) {
2172
882M
  KnownBits Known(getBitWidth(V->getType(), Q.DL));
2173
882M
  computeKnownBits(V, Known, Q, Depth);
2174
882M
  return Known;
2175
882M
}
2176
2177
/// Determine which bits of V are known to be either zero or one and return
2178
/// them in the Known bit set.
2179
///
2180
/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
2181
/// we cannot optimize based on the assumption that it is zero without changing
2182
/// it to be an explicit zero.  If we don't change it to zero, other code could
2183
/// optimized based on the contradictory assumption that it is non-zero.
2184
/// Because instcombine aggressively folds operations with undef args anyway,
2185
/// this won't lose us code quality.
2186
///
2187
/// This function is defined on values with integer type, values with pointer
2188
/// type, and vectors of integers.  In the case
2189
/// where V is a vector, known zero, and known one values are the
2190
/// same width as the vector element, and the bit is set only if it is true
2191
/// for all of the demanded elements in the vector specified by DemandedElts.
2192
void computeKnownBits(const Value *V, const APInt &DemandedElts,
2193
                      KnownBits &Known, const SimplifyQuery &Q,
2194
5.51G
                      unsigned Depth) {
2195
5.51G
  if (!DemandedElts) {
2196
    // No demanded elts, better to assume we don't know anything.
2197
4.58k
    Known.resetAll();
2198
4.58k
    return;
2199
4.58k
  }
2200
2201
5.51G
  assert(V && "No Value?");
2202
5.51G
  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2203
2204
#ifndef NDEBUG
2205
  Type *Ty = V->getType();
2206
  unsigned BitWidth = Known.getBitWidth();
2207
2208
  assert((Ty->isIntOrIntVectorTy(BitWidth) || Ty->isPtrOrPtrVectorTy()) &&
2209
         "Not integer or pointer type!");
2210
2211
  if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
2212
    assert(
2213
        FVTy->getNumElements() == DemandedElts.getBitWidth() &&
2214
        "DemandedElt width should equal the fixed vector number of elements");
2215
  } else {
2216
    assert(DemandedElts == APInt(1, 1) &&
2217
           "DemandedElt width should be 1 for scalars or scalable vectors");
2218
  }
2219
2220
  Type *ScalarTy = Ty->getScalarType();
2221
  if (ScalarTy->isPointerTy()) {
2222
    assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
2223
           "V and Known should have same BitWidth");
2224
  } else {
2225
    assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
2226
           "V and Known should have same BitWidth");
2227
  }
2228
#endif
2229
2230
5.51G
  const APInt *C;
2231
5.51G
  if (match(V, m_APInt(C))) {
2232
    // We know all of the bits for a scalar constant or a splat vector constant!
2233
1.29G
    Known = KnownBits::makeConstant(*C);
2234
1.29G
    return;
2235
1.29G
  }
2236
  // Null and aggregate-zero are all-zeros.
2237
4.22G
  if (isa<ConstantPointerNull>(V) || 
isa<ConstantAggregateZero>(V)4.17G
) {
2238
42.5M
    Known.setAllZero();
2239
42.5M
    return;
2240
42.5M
  }
2241
  // Handle a constant vector by taking the intersection of the known bits of
2242
  // each element.
2243
4.17G
  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
2244
58.1k
    assert(!isa<ScalableVectorType>(V->getType()));
2245
    // We know that CDV must be a vector of integers. Take the intersection of
2246
    // each element.
2247
58.1k
    Known.Zero.setAllBits(); Known.One.setAllBits();
2248
428k
    for (unsigned i = 0, e = CDV->getNumElements(); i != e; 
++i370k
) {
2249
370k
      if (!DemandedElts[i])
2250
68.7k
        continue;
2251
301k
      APInt Elt = CDV->getElementAsAPInt(i);
2252
301k
      Known.Zero &= ~Elt;
2253
301k
      Known.One &= Elt;
2254
301k
    }
2255
58.1k
    if (Known.hasConflict())
2256
0
      Known.resetAll();
2257
58.1k
    return;
2258
58.1k
  }
2259
2260
4.17G
  if (const auto *CV = dyn_cast<ConstantVector>(V)) {
2261
92.0k
    assert(!isa<ScalableVectorType>(V->getType()));
2262
    // We know that CV must be a vector of integers. Take the intersection of
2263
    // each element.
2264
92.0k
    Known.Zero.setAllBits(); Known.One.setAllBits();
2265
1.33M
    for (unsigned i = 0, e = CV->getNumOperands(); i != e; 
++i1.23M
) {
2266
1.23M
      if (!DemandedElts[i])
2267
841k
        continue;
2268
396k
      Constant *Element = CV->getAggregateElement(i);
2269
396k
      if (isa<PoisonValue>(Element))
2270
10.6k
        continue;
2271
385k
      auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
2272
385k
      if (!ElementCI) {
2273
3
        Known.resetAll();
2274
3
        return;
2275
3
      }
2276
385k
      const APInt &Elt = ElementCI->getValue();
2277
385k
      Known.Zero &= ~Elt;
2278
385k
      Known.One &= Elt;
2279
385k
    }
2280
92.0k
    if (Known.hasConflict())
2281
0
      Known.resetAll();
2282
92.0k
    return;
2283
92.0k
  }
2284
2285
  // Start out not knowing anything.
2286
4.17G
  Known.resetAll();
2287
2288
  // We can't imply anything about undefs.
2289
4.17G
  if (isa<UndefValue>(V))
2290
787k
    return;
2291
2292
  // There's no point in looking through other users of ConstantData for
2293
  // assumptions.  Confirm that we've handled them all.
2294
4.17G
  assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2295
2296
4.17G
  if (const auto *A = dyn_cast<Argument>(V))
2297
267M
    if (std::optional<ConstantRange> Range = A->getRange())
2298
5.74M
      Known = Range->toKnownBits();
2299
2300
  // All recursive calls that increase depth must come after this.
2301
4.17G
  if (Depth == MaxAnalysisRecursionDepth)
2302
583M
    return;
2303
2304
  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2305
  // the bits of its aliasee.
2306
3.59G
  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2307
0
    if (!GA->isInterposable())
2308
0
      computeKnownBits(GA->getAliasee(), Known, Q, Depth + 1);
2309
0
    return;
2310
0
  }
2311
2312
3.59G
  if (const Operator *I = dyn_cast<Operator>(V))
2313
3.32G
    computeKnownBitsFromOperator(I, DemandedElts, Known, Q, Depth);
2314
264M
  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2315
9.54M
    if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
2316
0
      Known = CR->toKnownBits();
2317
9.54M
  }
2318
2319
  // Aligned pointers have trailing zeros - refine Known.Zero set
2320
3.59G
  if (isa<PointerType>(V->getType())) {
2321
864M
    Align Alignment = V->getPointerAlignment(Q.DL);
2322
864M
    Known.Zero.setLowBits(Log2(Alignment));
2323
864M
  }
2324
2325
  // computeKnownBitsFromContext strictly refines Known.
2326
  // Therefore, we run them after computeKnownBitsFromOperator.
2327
2328
  // Check whether we can determine known bits from context such as assumes.
2329
3.59G
  computeKnownBitsFromContext(V, Known, Q, Depth);
2330
3.59G
}
2331
2332
/// Try to detect a recurrence that the value of the induction variable is
2333
/// always a power of two (or zero).
2334
static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2335
1.89M
                                   SimplifyQuery &Q, unsigned Depth) {
2336
1.89M
  BinaryOperator *BO = nullptr;
2337
1.89M
  Value *Start = nullptr, *Step = nullptr;
2338
1.89M
  if (!matchSimpleRecurrence(PN, BO, Start, Step))
2339
798k
    return false;
2340
2341
  // Initial value must be a power of two.
2342
1.78M
  
for (const Use &U : PN->operands())1.09M
{
2343
1.78M
    if (U.get() == Start) {
2344
      // Initial value comes from a different BB, need to adjust context
2345
      // instruction for analysis.
2346
1.09M
      Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2347
1.09M
      if (!isKnownToBeAPowerOfTwo(Start, OrZero, Q, Depth))
2348
1.08M
        return false;
2349
1.09M
    }
2350
1.78M
  }
2351
2352
  // Except for Mul, the induction variable must be on the left side of the
2353
  // increment expression, otherwise its value can be arbitrary.
2354
12.5k
  if (BO->getOpcode() != Instruction::Mul && 
BO->getOperand(1) != Step12.3k
)
2355
209
    return false;
2356
2357
12.3k
  Q.CxtI = BO->getParent()->getTerminator();
2358
12.3k
  switch (BO->getOpcode()) {
2359
253
  case Instruction::Mul:
2360
    // Power of two is closed under multiplication.
2361
253
    return (OrZero || 
Q.IIQ.hasNoUnsignedWrap(BO)0
||
2362
253
            
Q.IIQ.hasNoSignedWrap(BO)0
) &&
2363
253
           isKnownToBeAPowerOfTwo(Step, OrZero, Q, Depth);
2364
20
  case Instruction::SDiv:
2365
    // Start value must not be signmask for signed division, so simply being a
2366
    // power of two is not sufficient, and it has to be a constant.
2367
20
    if (!match(Start, m_Power2()) || 
match(Start, m_SignMask())0
)
2368
20
      return false;
2369
0
    [[fallthrough]];
2370
5
  case Instruction::UDiv:
2371
    // Divisor must be a power of two.
2372
    // If OrZero is false, cannot guarantee induction variable is non-zero after
2373
    // division, same for Shr, unless it is exact division.
2374
5
    return (OrZero || 
Q.IIQ.isExact(BO)0
) &&
2375
5
           isKnownToBeAPowerOfTwo(Step, false, Q, Depth);
2376
353
  case Instruction::Shl:
2377
353
    return OrZero || 
Q.IIQ.hasNoUnsignedWrap(BO)261
||
Q.IIQ.hasNoSignedWrap(BO)249
;
2378
0
  case Instruction::AShr:
2379
0
    if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2380
0
      return false;
2381
0
    [[fallthrough]];
2382
328
  case Instruction::LShr:
2383
328
    return OrZero || 
Q.IIQ.isExact(BO)31
;
2384
11.4k
  default:
2385
11.4k
    return false;
2386
12.3k
  }
2387
12.3k
}
2388
2389
/// Return true if we can infer that \p V is known to be a power of 2 from
2390
/// dominating condition \p Cond (e.g., ctpop(V) == 1).
2391
static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
2392
                                             const Value *Cond,
2393
2.34M
                                             bool CondIsTrue) {
2394
2.34M
  CmpPredicate Pred;
2395
2.34M
  const APInt *RHSC;
2396
2.34M
  if (!match(Cond, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Specific(V)),
2397
2.34M
                          m_APInt(RHSC))))
2398
2.30M
    return false;
2399
47.8k
  if (!CondIsTrue)
2400
23.7k
    Pred = ICmpInst::getInversePredicate(Pred);
2401
  // ctpop(V) u< 2
2402
47.8k
  if (OrZero && 
Pred == ICmpInst::ICMP_ULT47.7k
&&
*RHSC == 223.1k
)
2403
23.1k
    return true;
2404
  // ctpop(V) == 1
2405
24.6k
  return Pred == ICmpInst::ICMP_EQ && 
*RHSC == 1319
;
2406
47.8k
}
2407
2408
/// Return true if the given value is known to have exactly one
2409
/// bit set when defined. For vectors return true if every element is known to
2410
/// be a power of two when defined. Supports values with integer or pointer
2411
/// types and vectors of integers.
2412
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
2413
14.4M
                                  const SimplifyQuery &Q, unsigned Depth) {
2414
14.4M
  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2415
2416
14.4M
  if (isa<Constant>(V))
2417
3.74M
    return OrZero ? 
match(V, m_Power2OrZero())2.26M
:
match(V, m_Power2())1.48M
;
2418
2419
  // i1 is by definition a power of 2 or zero.
2420
10.6M
  if (OrZero && 
V->getType()->getScalarSizeInBits() == 17.72M
)
2421
3
    return true;
2422
2423
  // Try to infer from assumptions.
2424
10.6M
  if (Q.AC && 
Q.CxtI9.39M
) {
2425
8.94M
    for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
2426
70.6k
      if (!AssumeVH)
2427
22.1k
        continue;
2428
48.4k
      CallInst *I = cast<CallInst>(AssumeVH);
2429
48.4k
      if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0),
2430
48.4k
                                           /*CondIsTrue=*/true) &&
2431
48.4k
          
isValidAssumeForContext(I, Q.CxtI, Q.DT)0
)
2432
0
        return true;
2433
48.4k
    }
2434
8.94M
  }
2435
2436
  // Handle dominating conditions.
2437
10.6M
  if (Q.DC && 
Q.CxtI5.01M
&&
Q.DT4.55M
) {
2438
4.55M
    for (BranchInst *BI : Q.DC->conditionsFor(V)) {
2439
1.15M
      Value *Cond = BI->getCondition();
2440
2441
1.15M
      BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
2442
1.15M
      if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
2443
1.15M
                                           /*CondIsTrue=*/true) &&
2444
1.15M
          
Q.DT->dominates(Edge0, Q.CxtI->getParent())23.4k
)
2445
283
        return true;
2446
2447
1.15M
      BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
2448
1.15M
      if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, Cond,
2449
1.15M
                                           /*CondIsTrue=*/false) &&
2450
1.15M
          
Q.DT->dominates(Edge1, Q.CxtI->getParent())0
)
2451
0
        return true;
2452
1.15M
    }
2453
4.55M
  }
2454
2455
10.6M
  auto *I = dyn_cast<Instruction>(V);
2456
10.6M
  if (!I)
2457
333k
    return false;
2458
2459
10.3M
  if (Q.CxtI && 
match(V, m_VScale())9.66M
) {
2460
0
    const Function *F = Q.CxtI->getFunction();
2461
    // The vscale_range indicates vscale is a power-of-two.
2462
0
    return F->hasFnAttribute(Attribute::VScaleRange);
2463
0
  }
2464
2465
  // 1 << X is clearly a power of two if the one is not shifted off the end.  If
2466
  // it is shifted off the end then the result is undefined.
2467
10.3M
  if (match(I, m_Shl(m_One(), m_Value())))
2468
1.01k
    return true;
2469
2470
  // (signmask) >>l X is clearly a power of two if the one is not shifted off
2471
  // the bottom.  If it is shifted off the bottom then the result is undefined.
2472
10.3M
  if (match(I, m_LShr(m_SignMask(), m_Value())))
2473
2
    return true;
2474
2475
  // The remaining tests are all recursive, so bail out if we hit the limit.
2476
10.3M
  if (Depth++ == MaxAnalysisRecursionDepth)
2477
1.61M
    return false;
2478
2479
8.73M
  switch (I->getOpcode()) {
2480
125k
  case Instruction::ZExt:
2481
125k
    return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Q, Depth);
2482
243k
  case Instruction::Trunc:
2483
243k
    return OrZero && 
isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Q, Depth)208k
;
2484
319k
  case Instruction::Shl:
2485
319k
    if (OrZero || 
Q.IIQ.hasNoUnsignedWrap(I)205k
||
Q.IIQ.hasNoSignedWrap(I)204k
)
2486
134k
      return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Q, Depth);
2487
184k
    return false;
2488
179k
  case Instruction::LShr:
2489
179k
    if (OrZero || 
Q.IIQ.isExact(cast<BinaryOperator>(I))5.15k
)
2490
174k
      return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Q, Depth);
2491
5.07k
    return false;
2492
19.0k
  case Instruction::UDiv:
2493
19.0k
    if (Q.IIQ.isExact(cast<BinaryOperator>(I)))
2494
1
      return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Q, Depth);
2495
19.0k
    return false;
2496
39.3k
  case Instruction::Mul:
2497
39.3k
    return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Q, Depth) &&
2498
39.3k
           
isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Q, Depth)491
&&
2499
39.3k
           
(1
OrZero1
||
isKnownNonZero(I, Q, Depth)0
);
2500
927k
  case Instruction::And:
2501
    // A power of two and'd with anything is a power of two or zero.
2502
927k
    if (OrZero &&
2503
927k
        
(879k
isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Q, Depth)879k
||
2504
879k
         
isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Q, Depth)879k
))
2505
56
      return true;
2506
    // X & (-X) is always a power of two or zero.
2507
927k
    if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
2508
927k
        match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
2509
8
      return OrZero || 
isKnownNonZero(I->getOperand(0), Q, Depth)3
;
2510
927k
    return false;
2511
408k
  case Instruction::Add: {
2512
    // Adding a power-of-two or zero to the same power-of-two or zero yields
2513
    // either the original power-of-two, a larger power-of-two or zero.
2514
408k
    const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2515
408k
    if (OrZero || 
Q.IIQ.hasNoUnsignedWrap(VOBO)280k
||
2516
408k
        
Q.IIQ.hasNoSignedWrap(VOBO)273k
) {
2517
208k
      if (match(I->getOperand(0),
2518
208k
                m_c_And(m_Specific(I->getOperand(1)), m_Value())) &&
2519
208k
          
isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Q, Depth)913
)
2520
748
        return true;
2521
207k
      if (match(I->getOperand(1),
2522
207k
                m_c_And(m_Specific(I->getOperand(0)), m_Value())) &&
2523
207k
          
isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Q, Depth)11
)
2524
0
        return true;
2525
2526
207k
      unsigned BitWidth = V->getType()->getScalarSizeInBits();
2527
207k
      KnownBits LHSBits(BitWidth);
2528
207k
      computeKnownBits(I->getOperand(0), LHSBits, Q, Depth);
2529
2530
207k
      KnownBits RHSBits(BitWidth);
2531
207k
      computeKnownBits(I->getOperand(1), RHSBits, Q, Depth);
2532
      // If i8 V is a power of two or zero:
2533
      //  ZeroBits: 1 1 1 0 1 1 1 1
2534
      // ~ZeroBits: 0 0 0 1 0 0 0 0
2535
207k
      if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2536
        // If OrZero isn't set, we cannot give back a zero result.
2537
        // Make sure either the LHS or RHS has a bit set.
2538
29
        if (OrZero || 
RHSBits.One.getBoolValue()26
||
LHSBits.One.getBoolValue()26
)
2539
3
          return true;
2540
207k
    }
2541
2542
    // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2543
407k
    if (OrZero || 
Q.IIQ.hasNoUnsignedWrap(VOBO)279k
)
2544
133k
      if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2545
844
        return true;
2546
406k
    return false;
2547
407k
  }
2548
19.3k
  case Instruction::Select:
2549
19.3k
    return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Q, Depth) &&
2550
19.3k
           
isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Q, Depth)3.69k
;
2551
1.89M
  case Instruction::PHI: {
2552
    // A PHI node is power of two if all incoming values are power of two, or if
2553
    // it is an induction variable where in each step its value is a power of
2554
    // two.
2555
1.89M
    auto *PN = cast<PHINode>(I);
2556
1.89M
    SimplifyQuery RecQ = Q.getWithoutCondContext();
2557
2558
    // Check if it is an induction variable and always power of two.
2559
1.89M
    if (isPowerOfTwoRecurrence(PN, OrZero, RecQ, Depth))
2560
419
      return true;
2561
2562
    // Recursively check all incoming values. Limit recursion to 2 levels, so
2563
    // that search complexity is limited to number of operands^2.
2564
1.89M
    unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2565
1.92M
    return llvm::all_of(PN->operands(), [&](const Use &U) {
2566
      // Value is power of 2 if it is coming from PHI node itself by induction.
2567
1.92M
      if (U.get() == PN)
2568
68
        return true;
2569
2570
      // Change the context instruction to the incoming block where it is
2571
      // evaluated.
2572
1.92M
      RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2573
1.92M
      return isKnownToBeAPowerOfTwo(U.get(), OrZero, RecQ, NewDepth);
2574
1.92M
    });
2575
1.89M
  }
2576
10.0k
  case Instruction::Invoke:
2577
146k
  case Instruction::Call: {
2578
146k
    if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2579
46.5k
      switch (II->getIntrinsicID()) {
2580
23.2k
      case Intrinsic::umax:
2581
30.2k
      case Intrinsic::smax:
2582
34.4k
      case Intrinsic::umin:
2583
35.4k
      case Intrinsic::smin:
2584
35.4k
        return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Q, Depth) &&
2585
35.4k
               
isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Q, Depth)16.3k
;
2586
      // bswap/bitreverse just move around bits, but don't change any 1s/0s
2587
      // thus dont change pow2/non-pow2 status.
2588
6
      case Intrinsic::bitreverse:
2589
1.64k
      case Intrinsic::bswap:
2590
1.64k
        return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Q, Depth);
2591
5.11k
      case Intrinsic::fshr:
2592
5.13k
      case Intrinsic::fshl:
2593
        // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2594
5.13k
        if (II->getArgOperand(0) == II->getArgOperand(1))
2595
2.47k
          return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Q, Depth);
2596
2.65k
        break;
2597
4.30k
      default:
2598
4.30k
        break;
2599
46.5k
      }
2600
46.5k
    }
2601
106k
    return false;
2602
146k
  }
2603
4.41M
  default:
2604
4.41M
    return false;
2605
8.73M
  }
2606
8.73M
}
2607
2608
/// Test whether a GEP's result is known to be non-null.
2609
///
2610
/// Uses properties inherent in a GEP to try to determine whether it is known
2611
/// to be non-null.
2612
///
2613
/// Currently this routine does not support vector GEPs.
2614
static bool isGEPKnownNonNull(const GEPOperator *GEP, const SimplifyQuery &Q,
2615
31.5M
                              unsigned Depth) {
2616
31.5M
  const Function *F = nullptr;
2617
31.5M
  if (const Instruction *I = dyn_cast<Instruction>(GEP))
2618
31.4M
    F = I->getFunction();
2619
2620
  // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2621
  // may be null iff the base pointer is null and the offset is zero.
2622
31.5M
  if (!GEP->hasNoUnsignedWrap() &&
2623
31.5M
      
!(8.10M
GEP->isInBounds()8.10M
&&
2624
8.10M
        
!NullPointerIsDefined(F, GEP->getPointerAddressSpace())6.46M
))
2625
1.66M
    return false;
2626
2627
  // FIXME: Support vector-GEPs.
2628
31.5M
  assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2629
2630
  // If the base pointer is non-null, we cannot walk to a null address with an
2631
  // inbounds GEP in address space zero.
2632
29.9M
  if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth))
2633
8.21M
    return true;
2634
2635
  // Walk the GEP operands and see if any operand introduces a non-zero offset.
2636
  // If so, then the GEP cannot produce a null pointer, as doing so would
2637
  // inherently violate the inbounds contract within address space zero.
2638
21.6M
  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
2639
31.4M
       GTI != GTE; 
++GTI9.79M
) {
2640
    // Struct types are easy -- they must always be indexed by a constant.
2641
21.7M
    if (StructType *STy = GTI.getStructTypeOrNull()) {
2642
16.1k
      ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2643
16.1k
      unsigned ElementIdx = OpC->getZExtValue();
2644
16.1k
      const StructLayout *SL = Q.DL.getStructLayout(STy);
2645
16.1k
      uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2646
16.1k
      if (ElementOffset > 0)
2647
13.9k
        return true;
2648
2.17k
      continue;
2649
16.1k
    }
2650
2651
    // If we have a zero-sized type, the index doesn't matter. Keep looping.
2652
21.7M
    if (GTI.getSequentialElementStride(Q.DL).isZero())
2653
35.5k
      continue;
2654
2655
    // Fast path the constant operand case both for efficiency and so we don't
2656
    // increment Depth when just zipping down an all-constant GEP.
2657
21.7M
    if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2658
11.8M
      if (!OpC->isZero())
2659
11.8M
        return true;
2660
18.8k
      continue;
2661
11.8M
    }
2662
2663
    // We post-increment Depth here because while isKnownNonZero increments it
2664
    // as well, when we pop back up that increment won't persist. We don't want
2665
    // to recurse 10k times just because we have 10k GEP operands. We don't
2666
    // bail completely out because we want to handle constant GEPs regardless
2667
    // of depth.
2668
9.83M
    if (Depth++ >= MaxAnalysisRecursionDepth)
2669
4.05M
      continue;
2670
2671
5.77M
    if (isKnownNonZero(GTI.getOperand(), Q, Depth))
2672
95.0k
      return true;
2673
5.77M
  }
2674
2675
9.72M
  return false;
2676
21.6M
}
2677
2678
static bool isKnownNonNullFromDominatingCondition(const Value *V,
2679
                                                  const Instruction *CtxI,
2680
504M
                                                  const DominatorTree *DT) {
2681
504M
  assert(!isa<Constant>(V) && "Called for constant?");
2682
2683
504M
  if (!CtxI || 
!DT363M
)
2684
174M
    return false;
2685
2686
329M
  unsigned NumUsesExplored = 0;
2687
928M
  for (auto &U : V->uses()) {
2688
    // Avoid massive lists
2689
928M
    if (NumUsesExplored >= DomConditionsMaxUses)
2690
7.34M
      break;
2691
920M
    NumUsesExplored++;
2692
2693
920M
    const Instruction *UI = cast<Instruction>(U.getUser());
2694
    // If the value is used as an argument to a call or invoke, then argument
2695
    // attributes may provide an answer about null-ness.
2696
920M
    if (V->getType()->isPointerTy()) {
2697
598M
      if (const auto *CB = dyn_cast<CallBase>(UI)) {
2698
271M
        if (CB->isArgOperand(&U) &&
2699
271M
            CB->paramHasNonNullAttr(CB->getArgOperandNo(&U),
2700
269M
                                    /*AllowUndefOrPoison=*/false) &&
2701
271M
            
DT->dominates(CB, CtxI)55.0M
)
2702
880k
          return true;
2703
271M
      }
2704
598M
    }
2705
2706
    // If the value is used as a load/store, then the pointer must be non null.
2707
919M
    if (V == getLoadStorePointerOperand(UI)) {
2708
48.8M
      if (!NullPointerIsDefined(UI->getFunction(),
2709
48.8M
                                V->getType()->getPointerAddressSpace()) &&
2710
48.8M
          
DT->dominates(UI, CtxI)47.5M
)
2711
3.29M
        return true;
2712
48.8M
    }
2713
2714
916M
    if ((match(UI, m_IDiv(m_Value(), m_Specific(V))) ||
2715
916M
         
match(UI, m_IRem(m_Value(), m_Specific(V)))916M
) &&
2716
916M
        
isValidAssumeForContext(UI, CtxI, DT)275k
)
2717
12.8k
      return true;
2718
2719
    // Consider only compare instructions uniquely controlling a branch
2720
916M
    Value *RHS;
2721
916M
    CmpPredicate Pred;
2722
916M
    if (!match(UI, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2723
671M
      continue;
2724
2725
244M
    bool NonNullIfTrue;
2726
244M
    if (cmpExcludesZero(Pred, RHS))
2727
53.3M
      NonNullIfTrue = true;
2728
191M
    else if (cmpExcludesZero(CmpInst::getInversePredicate(Pred), RHS))
2729
167M
      NonNullIfTrue = false;
2730
23.3M
    else
2731
23.3M
      continue;
2732
2733
221M
    SmallVector<const User *, 4> WorkList;
2734
221M
    SmallPtrSet<const User *, 4> Visited;
2735
235M
    for (const auto *CmpU : UI->users()) {
2736
235M
      assert(WorkList.empty() && "Should be!");
2737
235M
      if (Visited.insert(CmpU).second)
2738
235M
        WorkList.push_back(CmpU);
2739
2740
473M
      while (!WorkList.empty()) {
2741
240M
        auto *Curr = WorkList.pop_back_val();
2742
2743
        // If a user is an AND, add all its users to the work list. We only
2744
        // propagate "pred != null" condition through AND because it is only
2745
        // correct to assume that all conditions of AND are met in true branch.
2746
        // TODO: Support similar logic of OR and EQ predicate?
2747
240M
        if (NonNullIfTrue)
2748
63.8M
          if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2749
4.31M
            for (const auto *CurrU : Curr->users())
2750
4.40M
              if (Visited.insert(CurrU).second)
2751
4.40M
                WorkList.push_back(CurrU);
2752
4.31M
            continue;
2753
4.31M
          }
2754
2755
235M
        if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2756
195M
          assert(BI->isConditional() && "uses a comparison!");
2757
2758
195M
          BasicBlock *NonNullSuccessor =
2759
195M
              BI->getSuccessor(NonNullIfTrue ? 
039.5M
:
1156M
);
2760
195M
          BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2761
195M
          if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2762
2.78M
            return true;
2763
195M
        } else 
if (39.8M
NonNullIfTrue39.8M
&&
isGuard(Curr)19.9M
&&
2764
39.8M
                   
DT->dominates(cast<Instruction>(Curr), CtxI)0
) {
2765
0
          return true;
2766
0
        }
2767
235M
      }
2768
235M
    }
2769
221M
  }
2770
2771
322M
  return false;
2772
329M
}
2773
2774
/// Does the 'Range' metadata (which must be a valid MD_range operand list)
2775
/// ensure that the value it's attached to is never Value?  'RangeType' is
2776
/// is the type of the value described by the range.
2777
12.4M
static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2778
12.4M
  const unsigned NumRanges = Ranges->getNumOperands() / 2;
2779
12.4M
  assert(NumRanges >= 1);
2780
12.4M
  for (unsigned i = 0; i < NumRanges; 
++i34.4k
) {
2781
12.4M
    ConstantInt *Lower =
2782
12.4M
        mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2783
12.4M
    ConstantInt *Upper =
2784
12.4M
        mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2785
12.4M
    ConstantRange Range(Lower->getValue(), Upper->getValue());
2786
12.4M
    if (Range.contains(Value))
2787
12.3M
      return false;
2788
12.4M
  }
2789
34.4k
  return true;
2790
12.4M
}
2791
2792
/// Try to detect a recurrence that monotonically increases/decreases from a
2793
/// non-zero starting value. These are common as induction variables.
2794
52.7M
static bool isNonZeroRecurrence(const PHINode *PN) {
2795
52.7M
  BinaryOperator *BO = nullptr;
2796
52.7M
  Value *Start = nullptr, *Step = nullptr;
2797
52.7M
  const APInt *StartC, *StepC;
2798
52.7M
  if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2799
52.7M
      
!match(Start, m_APInt(StartC))3.31M
||
StartC->isZero()1.86M
)
2800
52.4M
    return false;
2801
2802
306k
  switch (BO->getOpcode()) {
2803
272k
  case Instruction::Add:
2804
    // Starting from non-zero and stepping away from zero can never wrap back
2805
    // to zero.
2806
272k
    return BO->hasNoUnsignedWrap() ||
2807
272k
           
(98.6k
BO->hasNoSignedWrap()98.6k
&&
match(Step, m_APInt(StepC))61.2k
&&
2808
98.6k
            
StartC->isNegative() == StepC->isNegative()60.5k
);
2809
3.17k
  case Instruction::Mul:
2810
3.17k
    return (BO->hasNoUnsignedWrap() || 
BO->hasNoSignedWrap()2.22k
) &&
2811
3.17k
           
match(Step, m_APInt(StepC))2.84k
&&
!StepC->isZero()1.11k
;
2812
12.9k
  case Instruction::Shl:
2813
12.9k
    return BO->hasNoUnsignedWrap() || 
BO->hasNoSignedWrap()11.0k
;
2814
162
  case Instruction::AShr:
2815
9.30k
  case Instruction::LShr:
2816
9.30k
    return BO->isExact();
2817
9.25k
  default:
2818
9.25k
    return false;
2819
306k
  }
2820
306k
}
2821
2822
23.6M
static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) {
2823
23.6M
  return match(Op0, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2824
23.6M
                                                m_Specific(Op1), m_Zero()))) ||
2825
23.6M
         match(Op1, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2826
23.6M
                                                m_Specific(Op0), m_Zero())));
2827
23.6M
}
2828
2829
static bool isNonZeroAdd(const APInt &DemandedElts, const SimplifyQuery &Q,
2830
                         unsigned BitWidth, Value *X, Value *Y, bool NSW,
2831
14.4M
                         bool NUW, unsigned Depth) {
2832
  // (X + (X != 0)) is non zero
2833
14.4M
  if (matchOpWithOpEqZero(X, Y))
2834
0
    return true;
2835
2836
14.4M
  if (NUW)
2837
2.31M
    return isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2838
2.31M
           
isKnownNonZero(X, DemandedElts, Q, Depth)341k
;
2839
2840
12.1M
  KnownBits XKnown = computeKnownBits(X, DemandedElts, Q, Depth);
2841
12.1M
  KnownBits YKnown = computeKnownBits(Y, DemandedElts, Q, Depth);
2842
2843
  // If X and Y are both non-negative (as signed values) then their sum is not
2844
  // zero unless both X and Y are zero.
2845
12.1M
  if (XKnown.isNonNegative() && 
YKnown.isNonNegative()504k
)
2846
25.8k
    if (isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2847
25.8k
        
isKnownNonZero(X, DemandedElts, Q, Depth)2.44k
)
2848
24.2k
      return true;
2849
2850
  // If X and Y are both negative (as signed values) then their sum is not
2851
  // zero unless both X and Y equal INT_MIN.
2852
12.1M
  if (XKnown.isNegative() && 
YKnown.isNegative()22.9k
) {
2853
196
    APInt Mask = APInt::getSignedMaxValue(BitWidth);
2854
    // The sign bit of X is set.  If some other bit is set then X is not equal
2855
    // to INT_MIN.
2856
196
    if (XKnown.One.intersects(Mask))
2857
0
      return true;
2858
    // The sign bit of Y is set.  If some other bit is set then Y is not equal
2859
    // to INT_MIN.
2860
196
    if (YKnown.One.intersects(Mask))
2861
196
      return true;
2862
196
  }
2863
2864
  // The sum of a non-negative number and a power of two is not zero.
2865
12.1M
  if (XKnown.isNonNegative() &&
2866
12.1M
      
isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Q, Depth)479k
)
2867
2
    return true;
2868
12.1M
  if (YKnown.isNonNegative() &&
2869
12.1M
      
isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Q, Depth)1.87M
)
2870
635
    return true;
2871
2872
12.1M
  return KnownBits::add(XKnown, YKnown, NSW, NUW).isNonZero();
2873
12.1M
}
2874
2875
static bool isNonZeroSub(const APInt &DemandedElts, const SimplifyQuery &Q,
2876
                         unsigned BitWidth, Value *X, Value *Y,
2877
8.15M
                         unsigned Depth) {
2878
  // (X - (X != 0)) is non zero
2879
  // ((X != 0) - X) is non zero
2880
8.15M
  if (matchOpWithOpEqZero(X, Y))
2881
0
    return true;
2882
2883
  // TODO: Move this case into isKnownNonEqual().
2884
8.15M
  if (auto *C = dyn_cast<Constant>(X))
2885
1.02M
    if (C->isNullValue() && 
isKnownNonZero(Y, DemandedElts, Q, Depth)478k
)
2886
15.8k
      return true;
2887
2888
8.14M
  return ::isKnownNonEqual(X, Y, DemandedElts, Q, Depth);
2889
8.15M
}
2890
2891
static bool isNonZeroMul(const APInt &DemandedElts, const SimplifyQuery &Q,
2892
                         unsigned BitWidth, Value *X, Value *Y, bool NSW,
2893
962k
                         bool NUW, unsigned Depth) {
2894
  // If X and Y are non-zero then so is X * Y as long as the multiplication
2895
  // does not overflow.
2896
962k
  if (NSW || 
NUW513k
)
2897
536k
    return isKnownNonZero(X, DemandedElts, Q, Depth) &&
2898
536k
           
isKnownNonZero(Y, DemandedElts, Q, Depth)51.1k
;
2899
2900
  // If either X or Y is odd, then if the other is non-zero the result can't
2901
  // be zero.
2902
425k
  KnownBits XKnown = computeKnownBits(X, DemandedElts, Q, Depth);
2903
425k
  if (XKnown.One[0])
2904
1.35k
    return isKnownNonZero(Y, DemandedElts, Q, Depth);
2905
2906
424k
  KnownBits YKnown = computeKnownBits(Y, DemandedElts, Q, Depth);
2907
424k
  if (YKnown.One[0])
2908
7.91k
    return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth);
2909
2910
  // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2911
  // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2912
  // the lowest known One of X and Y. If they are non-zero, the result
2913
  // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2914
  // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2915
416k
  return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2916
416k
         BitWidth;
2917
424k
}
2918
2919
static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2920
                           const SimplifyQuery &Q, const KnownBits &KnownVal,
2921
3.28M
                           unsigned Depth) {
2922
3.28M
  auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2923
389k
    switch (I->getOpcode()) {
2924
29.5k
    case Instruction::Shl:
2925
29.5k
      return Lhs.shl(Rhs);
2926
353k
    case Instruction::LShr:
2927
353k
      return Lhs.lshr(Rhs);
2928
6.42k
    case Instruction::AShr:
2929
6.42k
      return Lhs.ashr(Rhs);
2930
0
    default:
2931
0
      llvm_unreachable("Unknown Shift Opcode");
2932
389k
    }
2933
389k
  };
2934
2935
3.28M
  auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2936
778k
    switch (I->getOpcode()) {
2937
59.0k
    case Instruction::Shl:
2938
59.0k
      return Lhs.lshr(Rhs);
2939
706k
    case Instruction::LShr:
2940
719k
    case Instruction::AShr:
2941
719k
      return Lhs.shl(Rhs);
2942
0
    default:
2943
0
      llvm_unreachable("Unknown Shift Opcode");
2944
778k
    }
2945
778k
  };
2946
2947
3.28M
  if (KnownVal.isUnknown())
2948
2.48M
    return false;
2949
2950
794k
  KnownBits KnownCnt =
2951
794k
      computeKnownBits(I->getOperand(1), DemandedElts, Q, Depth);
2952
794k
  APInt MaxShift = KnownCnt.getMaxValue();
2953
794k
  unsigned NumBits = KnownVal.getBitWidth();
2954
794k
  if (MaxShift.uge(NumBits))
2955
405k
    return false;
2956
2957
389k
  if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2958
132
    return true;
2959
2960
  // If all of the bits shifted out are known to be zero, and Val is known
2961
  // non-zero then at least one non-zero bit must remain.
2962
389k
  if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2963
389k
          .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
2964
389k
      
isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth)13.3k
)
2965
1.88k
    return true;
2966
2967
387k
  return false;
2968
389k
}
2969
2970
static bool isKnownNonZeroFromOperator(const Operator *I,
2971
                                       const APInt &DemandedElts,
2972
531M
                                       const SimplifyQuery &Q, unsigned Depth) {
2973
531M
  unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
2974
531M
  switch (I->getOpcode()) {
2975
42.5M
  case Instruction::Alloca:
2976
    // Alloca never returns null, malloc might.
2977
42.5M
    return I->getType()->getPointerAddressSpace() == 0;
2978
31.5M
  case Instruction::GetElementPtr:
2979
31.5M
    if (I->getType()->isPointerTy())
2980
31.5M
      return isGEPKnownNonNull(cast<GEPOperator>(I), Q, Depth);
2981
0
    break;
2982
1.61M
  case Instruction::BitCast: {
2983
    // We need to be a bit careful here. We can only peek through the bitcast
2984
    // if the scalar size of elements in the operand are smaller than and a
2985
    // multiple of the size they are casting too. Take three cases:
2986
    //
2987
    // 1) Unsafe:
2988
    //        bitcast <2 x i16> %NonZero to <4 x i8>
2989
    //
2990
    //    %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2991
    //    <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2992
    //    guranteed (imagine just sign bit set in the 2 i16 elements).
2993
    //
2994
    // 2) Unsafe:
2995
    //        bitcast <4 x i3> %NonZero to <3 x i4>
2996
    //
2997
    //    Even though the scalar size of the src (`i3`) is smaller than the
2998
    //    scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2999
    //    its possible for the `3 x i4` elements to be zero because there are
3000
    //    some elements in the destination that don't contain any full src
3001
    //    element.
3002
    //
3003
    // 3) Safe:
3004
    //        bitcast <4 x i8> %NonZero to <2 x i16>
3005
    //
3006
    //    This is always safe as non-zero in the 4 i8 elements implies
3007
    //    non-zero in the combination of any two adjacent ones. Since i8 is a
3008
    //    multiple of i16, each i16 is guranteed to have 2 full i8 elements.
3009
    //    This all implies the 2 i16 elements are non-zero.
3010
1.61M
    Type *FromTy = I->getOperand(0)->getType();
3011
1.61M
    if ((FromTy->isIntOrIntVectorTy() || 
FromTy->isPtrOrPtrVectorTy()2.32k
) &&
3012
1.61M
        
(BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 01.61M
)
3013
1.61M
      return isKnownNonZero(I->getOperand(0), Q, Depth);
3014
1.61M
  } 
break7.37k
;
3015
1.70M
  case Instruction::IntToPtr:
3016
    // Note that we have to take special care to avoid looking through
3017
    // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
3018
    // as casts that can alter the value, e.g., AddrSpaceCasts.
3019
1.70M
    if (!isa<ScalableVectorType>(I->getType()) &&
3020
1.70M
        Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
3021
1.70M
            Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
3022
1.70M
      return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3023
0
    break;
3024
1.26M
  case Instruction::PtrToInt:
3025
    // Similar to int2ptr above, we can look through ptr2int here if the cast
3026
    // is a no-op or an extend and not a truncate.
3027
1.26M
    if (!isa<ScalableVectorType>(I->getType()) &&
3028
1.26M
        Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
3029
1.26M
            Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
3030
1.26M
      return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3031
0
    break;
3032
2.83M
  case Instruction::Trunc:
3033
    // nuw/nsw trunc preserves zero/non-zero status of input.
3034
2.83M
    if (auto *TI = dyn_cast<TruncInst>(I))
3035
2.83M
      if (TI->hasNoSignedWrap() || 
TI->hasNoUnsignedWrap()2.69M
)
3036
245k
        return isKnownNonZero(TI->getOperand(0), DemandedElts, Q, Depth);
3037
2.59M
    break;
3038
3039
  // Iff x - y != 0, then x ^ y != 0
3040
  // Therefore we can do the same exact checks
3041
2.59M
  case Instruction::Xor:
3042
8.15M
  case Instruction::Sub:
3043
8.15M
    return isNonZeroSub(DemandedElts, Q, BitWidth, I->getOperand(0),
3044
8.15M
                        I->getOperand(1), Depth);
3045
865k
  case Instruction::Or:
3046
    // (X | (X != 0)) is non zero
3047
865k
    if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
3048
0
      return true;
3049
    // X | Y != 0 if X != Y.
3050
865k
    if (isKnownNonEqual(I->getOperand(0), I->getOperand(1), DemandedElts, Q,
3051
865k
                        Depth))
3052
258k
      return true;
3053
    // X | Y != 0 if X != 0 or Y != 0.
3054
606k
    return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) ||
3055
606k
           
isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth)566k
;
3056
1.17M
  case Instruction::SExt:
3057
8.14M
  case Instruction::ZExt:
3058
    // ext X != 0 if X != 0.
3059
8.14M
    return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3060
3061
2.15M
  case Instruction::Shl: {
3062
    // shl nsw/nuw can't remove any non-zero bits.
3063
2.15M
    const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
3064
2.15M
    if (Q.IIQ.hasNoUnsignedWrap(BO) || 
Q.IIQ.hasNoSignedWrap(BO)1.00M
)
3065
1.49M
      return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3066
3067
    // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
3068
    // if the lowest bit is shifted off the end.
3069
662k
    KnownBits Known(BitWidth);
3070
662k
    computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth);
3071
662k
    if (Known.One[0])
3072
1.43k
      return true;
3073
3074
661k
    return isNonZeroShift(I, DemandedElts, Q, Known, Depth);
3075
662k
  }
3076
2.59M
  case Instruction::LShr:
3077
4.24M
  case Instruction::AShr: {
3078
    // shr exact can only shift out zero bits.
3079
4.24M
    const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
3080
4.24M
    if (BO->isExact())
3081
1.62M
      return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3082
3083
    // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
3084
    // defined if the sign bit is shifted off the end.
3085
2.61M
    KnownBits Known =
3086
2.61M
        computeKnownBits(I->getOperand(0), DemandedElts, Q, Depth);
3087
2.61M
    if (Known.isNegative())
3088
241
      return true;
3089
3090
2.61M
    return isNonZeroShift(I, DemandedElts, Q, Known, Depth);
3091
2.61M
  }
3092
132k
  case Instruction::UDiv:
3093
655k
  case Instruction::SDiv: {
3094
    // X / Y
3095
    // div exact can only produce a zero if the dividend is zero.
3096
655k
    if (cast<PossiblyExactOperator>(I)->isExact())
3097
418k
      return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3098
3099
237k
    KnownBits XKnown =
3100
237k
        computeKnownBits(I->getOperand(0), DemandedElts, Q, Depth);
3101
    // If X is fully unknown we won't be able to figure anything out so don't
3102
    // both computing knownbits for Y.
3103
237k
    if (XKnown.isUnknown())
3104
212k
      return false;
3105
3106
24.4k
    KnownBits YKnown =
3107
24.4k
        computeKnownBits(I->getOperand(1), DemandedElts, Q, Depth);
3108
24.4k
    if (I->getOpcode() == Instruction::SDiv) {
3109
      // For signed division need to compare abs value of the operands.
3110
8.98k
      XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
3111
8.98k
      YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
3112
8.98k
    }
3113
    // If X u>= Y then div is non zero (0/0 is UB).
3114
24.4k
    std::optional<bool> XUgeY = KnownBits::uge(XKnown, YKnown);
3115
    // If X is total unknown or X u< Y we won't be able to prove non-zero
3116
    // with compute known bits so just return early.
3117
24.4k
    return XUgeY && 
*XUgeY489
;
3118
237k
  }
3119
14.4M
  case Instruction::Add: {
3120
    // X + Y.
3121
3122
    // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
3123
    // non-zero.
3124
14.4M
    auto *BO = cast<OverflowingBinaryOperator>(I);
3125
14.4M
    return isNonZeroAdd(DemandedElts, Q, BitWidth, I->getOperand(0),
3126
14.4M
                        I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
3127
14.4M
                        Q.IIQ.hasNoUnsignedWrap(BO), Depth);
3128
237k
  }
3129
953k
  case Instruction::Mul: {
3130
953k
    const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
3131
953k
    return isNonZeroMul(DemandedElts, Q, BitWidth, I->getOperand(0),
3132
953k
                        I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
3133
953k
                        Q.IIQ.hasNoUnsignedWrap(BO), Depth);
3134
237k
  }
3135
8.06M
  case Instruction::Select: {
3136
    // (C ? X : Y) != 0 if X != 0 and Y != 0.
3137
3138
    // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
3139
    // then see if the select condition implies the arm is non-zero. For example
3140
    // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
3141
    // dominated by `X != 0`.
3142
11.8M
    auto SelectArmIsNonZero = [&](bool IsTrueArm) {
3143
11.8M
      Value *Op;
3144
11.8M
      Op = IsTrueArm ? 
I->getOperand(1)8.06M
:
I->getOperand(2)3.78M
;
3145
      // Op is trivially non-zero.
3146
11.8M
      if (isKnownNonZero(Op, DemandedElts, Q, Depth))
3147
3.82M
        return true;
3148
3149
      // The condition of the select dominates the true/false arm. Check if the
3150
      // condition implies that a given arm is non-zero.
3151
8.03M
      Value *X;
3152
8.03M
      CmpPredicate Pred;
3153
8.03M
      if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X))))
3154
7.17M
        return false;
3155
3156
857k
      if (!IsTrueArm)
3157
304k
        Pred = ICmpInst::getInversePredicate(Pred);
3158
3159
857k
      return cmpExcludesZero(Pred, X);
3160
8.03M
    };
3161
3162
8.06M
    if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
3163
8.06M
        
SelectArmIsNonZero(/* IsTrueArm */ false)3.78M
)
3164
309k
      return true;
3165
7.75M
    break;
3166
8.06M
  }
3167
52.7M
  case Instruction::PHI: {
3168
52.7M
    auto *PN = cast<PHINode>(I);
3169
52.7M
    if (Q.IIQ.UseInstrInfo && 
isNonZeroRecurrence(PN)52.7M
)
3170
188k
      return true;
3171
3172
    // Check if all incoming values are non-zero using recursion.
3173
52.5M
    SimplifyQuery RecQ = Q.getWithoutCondContext();
3174
52.5M
    unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
3175
64.6M
    return llvm::all_of(PN->operands(), [&](const Use &U) {
3176
64.6M
      if (U.get() == PN)
3177
33.1k
        return true;
3178
64.6M
      RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
3179
      // Check if the branch on the phi excludes zero.
3180
64.6M
      CmpPredicate Pred;
3181
64.6M
      Value *X;
3182
64.6M
      BasicBlock *TrueSucc, *FalseSucc;
3183
64.6M
      if (match(RecQ.CxtI,
3184
64.6M
                m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)),
3185
64.6M
                     m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
3186
        // Check for cases of duplicate successors.
3187
6.10M
        if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
3188
          // If we're using the false successor, invert the predicate.
3189
6.10M
          if (FalseSucc == PN->getParent())
3190
4.46M
            Pred = CmpInst::getInversePredicate(Pred);
3191
6.10M
          if (cmpExcludesZero(Pred, X))
3192
1.11M
            return true;
3193
6.10M
        }
3194
6.10M
      }
3195
      // Finally recurse on the edge and check it directly.
3196
63.5M
      return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth);
3197
64.6M
    });
3198
52.7M
  }
3199
3.67k
  case Instruction::InsertElement: {
3200
3.67k
    if (isa<ScalableVectorType>(I->getType()))
3201
0
      break;
3202
3203
3.67k
    const Value *Vec = I->getOperand(0);
3204
3.67k
    const Value *Elt = I->getOperand(1);
3205
3.67k
    auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
3206
3207
3.67k
    unsigned NumElts = DemandedElts.getBitWidth();
3208
3.67k
    APInt DemandedVecElts = DemandedElts;
3209
3.67k
    bool SkipElt = false;
3210
    // If we know the index we are inserting too, clear it from Vec check.
3211
3.67k
    if (CIdx && CIdx->getValue().ult(NumElts)) {
3212
3.67k
      DemandedVecElts.clearBit(CIdx->getZExtValue());
3213
3.67k
      SkipElt = !DemandedElts[CIdx->getZExtValue()];
3214
3.67k
    }
3215
3216
    // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
3217
    // are non-zero.
3218
3.67k
    return (SkipElt || 
isKnownNonZero(Elt, Q, Depth)3.66k
) &&
3219
3.67k
           
(68
DemandedVecElts.isZero()68
||
3220
68
            
isKnownNonZero(Vec, DemandedVecElts, Q, Depth)8
);
3221
3.67k
  }
3222
9.78k
  case Instruction::ExtractElement:
3223
9.78k
    if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
3224
9.78k
      const Value *Vec = EEI->getVectorOperand();
3225
9.78k
      const Value *Idx = EEI->getIndexOperand();
3226
9.78k
      auto *CIdx = dyn_cast<ConstantInt>(Idx);
3227
9.78k
      if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
3228
9.78k
        unsigned NumElts = VecTy->getNumElements();
3229
9.78k
        APInt DemandedVecElts = APInt::getAllOnes(NumElts);
3230
9.78k
        if (CIdx && CIdx->getValue().ult(NumElts))
3231
9.78k
          DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
3232
9.78k
        return isKnownNonZero(Vec, DemandedVecElts, Q, Depth);
3233
9.78k
      }
3234
9.78k
    }
3235
0
    break;
3236
5.05k
  case Instruction::ShuffleVector: {
3237
5.05k
    auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
3238
5.05k
    if (!Shuf)
3239
0
      break;
3240
5.05k
    APInt DemandedLHS, DemandedRHS;
3241
    // For undef elements, we don't know anything about the common state of
3242
    // the shuffle result.
3243
5.05k
    if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3244
300
      break;
3245
    // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3246
4.75k
    return (DemandedRHS.isZero() ||
3247
4.75k
            
isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)811
) &&
3248
4.75k
           
(3.94k
DemandedLHS.isZero()3.94k
||
3249
3.94k
            isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth));
3250
5.05k
  }
3251
575k
  case Instruction::Freeze:
3252
575k
    return isKnownNonZero(I->getOperand(0), Q, Depth) &&
3253
575k
           isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
3254
555
                                     Depth);
3255
227M
  case Instruction::Load: {
3256
227M
    auto *LI = cast<LoadInst>(I);
3257
    // A Load tagged with nonnull or dereferenceable with null pointer undefined
3258
    // is never null.
3259
227M
    if (auto *PtrT = dyn_cast<PointerType>(I->getType())) {
3260
157M
      if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) ||
3261
157M
          
(153M
Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable)153M
&&
3262
153M
           
!NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace())0
))
3263
4.34M
        return true;
3264
157M
    } else 
if (MDNode *69.9M
Ranges69.9M
= Q.IIQ.getMetadata(LI, LLVMContext::MD_range)) {
3265
12.2M
      return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth));
3266
12.2M
    }
3267
3268
    // No need to fall through to computeKnownBits as range metadata is already
3269
    // handled in isKnownNonZero.
3270
211M
    return false;
3271
227M
  }
3272
15.8M
  case Instruction::ExtractValue: {
3273
15.8M
    const WithOverflowInst *WO;
3274
15.8M
    if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) {
3275
49.6k
      switch (WO->getBinaryOp()) {
3276
0
      default:
3277
0
        break;
3278
40.8k
      case Instruction::Add:
3279
40.8k
        return isNonZeroAdd(DemandedElts, Q, BitWidth, WO->getArgOperand(0),
3280
40.8k
                            WO->getArgOperand(1),
3281
40.8k
                            /*NSW=*/false,
3282
40.8k
                            /*NUW=*/false, Depth);
3283
97
      case Instruction::Sub:
3284
97
        return isNonZeroSub(DemandedElts, Q, BitWidth, WO->getArgOperand(0),
3285
97
                            WO->getArgOperand(1), Depth);
3286
8.66k
      case Instruction::Mul:
3287
8.66k
        return isNonZeroMul(DemandedElts, Q, BitWidth, WO->getArgOperand(0),
3288
8.66k
                            WO->getArgOperand(1),
3289
8.66k
                            /*NSW=*/false, /*NUW=*/false, Depth);
3290
0
        break;
3291
49.6k
      }
3292
49.6k
    }
3293
15.8M
    break;
3294
15.8M
  }
3295
56.6M
  case Instruction::Call:
3296
64.7M
  case Instruction::Invoke: {
3297
64.7M
    const auto *Call = cast<CallBase>(I);
3298
64.7M
    if (I->getType()->isPointerTy()) {
3299
45.1M
      if (Call->isReturnNonNull())
3300
5.50M
        return true;
3301
39.6M
      if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
3302
831
        return isKnownNonZero(RP, Q, Depth);
3303
39.6M
    } else {
3304
19.5M
      if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range))
3305
139k
        return rangeMetadataExcludesValue(Ranges, APInt::getZero(BitWidth));
3306
19.3M
      if (std::optional<ConstantRange> Range = Call->getRange()) {
3307
1.61M
        const APInt ZeroValue(Range->getBitWidth(), 0);
3308
1.61M
        if (!Range->contains(ZeroValue))
3309
8.83k
          return true;
3310
1.61M
      }
3311
19.3M
      if (const Value *RV = Call->getReturnedArgOperand())
3312
20
        if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth))
3313
3
          return true;
3314
19.3M
    }
3315
3316
59.0M
    if (auto *II = dyn_cast<IntrinsicInst>(I)) {
3317
6.66M
      switch (II->getIntrinsicID()) {
3318
0
      case Intrinsic::sshl_sat:
3319
0
      case Intrinsic::ushl_sat:
3320
10.1k
      case Intrinsic::abs:
3321
10.2k
      case Intrinsic::bitreverse:
3322
66.2k
      case Intrinsic::bswap:
3323
70.1k
      case Intrinsic::ctpop:
3324
70.1k
        return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3325
        // NB: We don't do usub_sat here as in any case we can prove its
3326
        // non-zero, we will fold it to `sub nuw` in InstCombine.
3327
6
      case Intrinsic::ssub_sat:
3328
6
        return isNonZeroSub(DemandedElts, Q, BitWidth, II->getArgOperand(0),
3329
6
                            II->getArgOperand(1), Depth);
3330
45
      case Intrinsic::sadd_sat:
3331
45
        return isNonZeroAdd(DemandedElts, Q, BitWidth, II->getArgOperand(0),
3332
45
                            II->getArgOperand(1),
3333
45
                            /*NSW=*/true, /* NUW=*/false, Depth);
3334
        // Vec reverse preserves zero/non-zero status from input vec.
3335
0
      case Intrinsic::vector_reverse:
3336
0
        return isKnownNonZero(II->getArgOperand(0), DemandedElts.reverseBits(),
3337
0
                              Q, Depth);
3338
        // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3339
101
      case Intrinsic::vector_reduce_or:
3340
101
      case Intrinsic::vector_reduce_umax:
3341
101
      case Intrinsic::vector_reduce_umin:
3342
101
      case Intrinsic::vector_reduce_smax:
3343
101
      case Intrinsic::vector_reduce_smin:
3344
101
        return isKnownNonZero(II->getArgOperand(0), Q, Depth);
3345
131k
      case Intrinsic::umax:
3346
133k
      case Intrinsic::uadd_sat:
3347
        // umax(X, (X != 0)) is non zero
3348
        // X +usat (X != 0) is non zero
3349
133k
        if (matchOpWithOpEqZero(II->getArgOperand(0), II->getArgOperand(1)))
3350
0
          return true;
3351
3352
133k
        return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) ||
3353
133k
               
isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth)91.7k
;
3354
65.0k
      case Intrinsic::smax: {
3355
        // If either arg is strictly positive the result is non-zero. Otherwise
3356
        // the result is non-zero if both ops are non-zero.
3357
65.0k
        auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
3358
99.2k
                             const KnownBits &OpKnown) {
3359
99.2k
          if (!OpNonZero.has_value())
3360
77.1k
            OpNonZero = OpKnown.isNonZero() ||
3361
77.1k
                        
isKnownNonZero(Op, DemandedElts, Q, Depth)42.8k
;
3362
99.2k
          return *OpNonZero;
3363
99.2k
        };
3364
        // Avoid re-computing isKnownNonZero.
3365
65.0k
        std::optional<bool> Op0NonZero, Op1NonZero;
3366
65.0k
        KnownBits Op1Known =
3367
65.0k
            computeKnownBits(II->getArgOperand(1), DemandedElts, Q, Depth);
3368
65.0k
        if (Op1Known.isNonNegative() &&
3369
65.0k
            
IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known)44.7k
)
3370
22.2k
          return true;
3371
42.7k
        KnownBits Op0Known =
3372
42.7k
            computeKnownBits(II->getArgOperand(0), DemandedElts, Q, Depth);
3373
42.7k
        if (Op0Known.isNonNegative() &&
3374
42.7k
            
IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known)1.31k
)
3375
1.03k
          return true;
3376
41.7k
        return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) &&
3377
41.7k
               
IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known)11.5k
;
3378
42.7k
      }
3379
158k
      case Intrinsic::smin: {
3380
        // If either arg is negative the result is non-zero. Otherwise
3381
        // the result is non-zero if both ops are non-zero.
3382
158k
        KnownBits Op1Known =
3383
158k
            computeKnownBits(II->getArgOperand(1), DemandedElts, Q, Depth);
3384
158k
        if (Op1Known.isNegative())
3385
9
          return true;
3386
158k
        KnownBits Op0Known =
3387
158k
            computeKnownBits(II->getArgOperand(0), DemandedElts, Q, Depth);
3388
158k
        if (Op0Known.isNegative())
3389
1
          return true;
3390
3391
158k
        if (Op1Known.isNonZero() && 
Op0Known.isNonZero()18.3k
)
3392
96
          return true;
3393
158k
      }
3394
158k
        [[fallthrough]];
3395
5.49M
      case Intrinsic::umin:
3396
5.49M
        return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) &&
3397
5.49M
               
isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth)33.2k
;
3398
468k
      case Intrinsic::cttz:
3399
468k
        return computeKnownBits(II->getArgOperand(0), DemandedElts, Q, Depth)
3400
468k
            .Zero[0];
3401
347k
      case Intrinsic::ctlz:
3402
347k
        return computeKnownBits(II->getArgOperand(0), DemandedElts, Q, Depth)
3403
347k
            .isNonNegative();
3404
4.95k
      case Intrinsic::fshr:
3405
12.0k
      case Intrinsic::fshl:
3406
        // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3407
12.0k
        if (II->getArgOperand(0) == II->getArgOperand(1))
3408
10.9k
          return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3409
1.16k
        break;
3410
1.16k
      case Intrinsic::vscale:
3411
0
        return true;
3412
0
      case Intrinsic::experimental_get_vector_length:
3413
0
        return isKnownNonZero(I->getOperand(0), Q, Depth);
3414
78.1k
      default:
3415
78.1k
        break;
3416
6.66M
      }
3417
79.3k
      break;
3418
6.66M
    }
3419
3420
52.3M
    return false;
3421
59.0M
  }
3422
531M
  }
3423
3424
66.4M
  KnownBits Known(BitWidth);
3425
66.4M
  computeKnownBits(I, DemandedElts, Known, Q, Depth);
3426
66.4M
  return Known.One != 0;
3427
531M
}
3428
3429
/// Return true if the given value is known to be non-zero when defined. For
3430
/// vectors, return true if every demanded element is known to be non-zero when
3431
/// defined. For pointers, if the context instruction and dominator tree are
3432
/// specified, perform context-sensitive analysis and return true if the
3433
/// pointer couldn't possibly be null at the specified instruction.
3434
/// Supports values with integer or pointer type and vectors of integers.
3435
bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3436
640M
                    const SimplifyQuery &Q, unsigned Depth) {
3437
640M
  Type *Ty = V->getType();
3438
3439
#ifndef NDEBUG
3440
  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3441
3442
  if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3443
    assert(
3444
        FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3445
        "DemandedElt width should equal the fixed vector number of elements");
3446
  } else {
3447
    assert(DemandedElts == APInt(1, 1) &&
3448
           "DemandedElt width should be 1 for scalars");
3449
  }
3450
#endif
3451
3452
640M
  if (auto *C = dyn_cast<Constant>(V)) {
3453
28.1M
    if (C->isNullValue())
3454
15.8M
      return false;
3455
12.2M
    if (isa<ConstantInt>(C))
3456
      // Must be non-zero due to null test above.
3457
6.96M
      return true;
3458
3459
    // For constant vectors, check that all elements are poison or known
3460
    // non-zero to determine that the whole vector is known non-zero.
3461
5.31M
    if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
3462
14.5k
      for (unsigned i = 0, e = VecTy->getNumElements(); i != e; 
++i9.00k
) {
3463
13.7k
        if (!DemandedElts[i])
3464
2.99k
          continue;
3465
10.7k
        Constant *Elt = C->getAggregateElement(i);
3466
10.7k
        if (!Elt || Elt->isNullValue())
3467
4.74k
          return false;
3468
6.01k
        if (!isa<PoisonValue>(Elt) && !isa<ConstantInt>(Elt))
3469
13
          return false;
3470
6.01k
      }
3471
797
      return true;
3472
5.55k
    }
3473
3474
    // Constant ptrauth can be null, iff the base pointer can be.
3475
5.31M
    if (auto *CPA = dyn_cast<ConstantPtrAuth>(V))
3476
0
      return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth);
3477
3478
    // A global variable in address space 0 is non null unless extern weak
3479
    // or an absolute symbol reference. Other address spaces may have null as a
3480
    // valid address for a global, so we can't assume anything.
3481
5.31M
    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3482
4.30M
      if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3483
4.30M
          
GV->getType()->getAddressSpace() == 04.20M
)
3484
4.20M
        return true;
3485
4.30M
    }
3486
3487
    // For constant expressions, fall through to the Operator code below.
3488
1.11M
    if (!isa<ConstantExpr>(V))
3489
921k
      return false;
3490
1.11M
  }
3491
3492
612M
  if (const auto *A = dyn_cast<Argument>(V))
3493
62.0M
    if (std::optional<ConstantRange> Range = A->getRange()) {
3494
341k
      const APInt ZeroValue(Range->getBitWidth(), 0);
3495
341k
      if (!Range->contains(ZeroValue))
3496
85.1k
        return true;
3497
341k
    }
3498
3499
612M
  if (!isa<Constant>(V) && 
isKnownNonZeroFromAssume(V, Q)612M
)
3500
817k
    return true;
3501
3502
  // Some of the tests below are recursive, so bail out if we hit the limit.
3503
612M
  if (Depth++ >= MaxAnalysisRecursionDepth)
3504
19.6M
    return false;
3505
3506
  // Check for pointer simplifications.
3507
3508
592M
  if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
3509
    // A byval, inalloca may not be null in a non-default addres space. A
3510
    // nonnull argument is assumed never 0.
3511
389M
    if (const Argument *A = dyn_cast<Argument>(V)) {
3512
52.9M
      if (((A->hasPassPointeeByValueCopyAttr() &&
3513
52.9M
            
!NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())59.8k
) ||
3514
52.9M
           
A->hasNonNullAttr()52.8M
))
3515
8.26M
        return true;
3516
52.9M
    }
3517
389M
  }
3518
3519
584M
  if (const auto *I = dyn_cast<Operator>(V))
3520
531M
    if (isKnownNonZeroFromOperator(I, DemandedElts, Q, Depth))
3521
79.7M
      return true;
3522
3523
504M
  if (!isa<Constant>(V) &&
3524
504M
      
isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)504M
)
3525
6.96M
    return true;
3526
3527
497M
  if (const Value *Stripped = stripNullTest(V))
3528
5.16k
    return isKnownNonZero(Stripped, DemandedElts, Q, Depth);
3529
3530
497M
  return false;
3531
497M
}
3532
3533
bool llvm::isKnownNonZero(const Value *V, const SimplifyQuery &Q,
3534
531M
                          unsigned Depth) {
3535
531M
  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
3536
531M
  APInt DemandedElts =
3537
531M
      FVTy ? 
APInt::getAllOnes(FVTy->getNumElements())1.62M
:
APInt(1, 1)529M
;
3538
531M
  return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3539
531M
}
3540
3541
/// If the pair of operators are the same invertible function, return the
3542
/// the operands of the function corresponding to each input. Otherwise,
3543
/// return std::nullopt.  An invertible function is one that is 1-to-1 and maps
3544
/// every input value to exactly one output value.  This is equivalent to
3545
/// saying that Op1 and Op2 are equal exactly when the specified pair of
3546
/// operands are equal, (except that Op1 and Op2 may be poison more often.)
3547
static std::optional<std::pair<Value*, Value*>>
3548
getInvertibleOperands(const Operator *Op1,
3549
43.2M
                      const Operator *Op2) {
3550
43.2M
  if (Op1->getOpcode() != Op2->getOpcode())
3551
453
    return std::nullopt;
3552
3553
43.2M
  auto getOperands = [&](unsigned OpNum) -> auto {
3554
429k
    return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
3555
429k
  };
3556
3557
43.2M
  switch (Op1->getOpcode()) {
3558
38.5M
  default:
3559
38.5M
    break;
3560
38.5M
  case Instruction::Or:
3561
49.8k
    if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() ||
3562
49.8k
        
!cast<PossiblyDisjointInst>(Op2)->isDisjoint()35.7k
)
3563
14.2k
      break;
3564
35.6k
    [[fallthrough]];
3565
51.5k
  case Instruction::Xor:
3566
534k
  case Instruction::Add: {
3567
534k
    Value *Other;
3568
534k
    if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other))))
3569
50.9k
      return std::make_pair(Op1->getOperand(1), Other);
3570
483k
    if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other))))
3571
184k
      return std::make_pair(Op1->getOperand(0), Other);
3572
299k
    break;
3573
483k
  }
3574
299k
  case Instruction::Sub:
3575
248k
    if (Op1->getOperand(0) == Op2->getOperand(0))
3576
3.18k
      return getOperands(1);
3577
245k
    if (Op1->getOperand(1) == Op2->getOperand(1))
3578
14.5k
      return getOperands(0);
3579
230k
    break;
3580
230k
  case Instruction::Mul: {
3581
    // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3582
    // and N is the bitwdith.  The nsw case is non-obvious, but proven by
3583
    // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3584
82.0k
    auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3585
82.0k
    auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3586
82.0k
    if ((!OBO1->hasNoUnsignedWrap() || 
!OBO2->hasNoUnsignedWrap()1.99k
) &&
3587
82.0k
        
(80.3k
!OBO1->hasNoSignedWrap()80.3k
||
!OBO2->hasNoSignedWrap()29.7k
))
3588
50.8k
      break;
3589
3590
    // Assume operand order has been canonicalized
3591
31.2k
    if (Op1->getOperand(1) == Op2->getOperand(1) &&
3592
31.2k
        
isa<ConstantInt>(Op1->getOperand(1))16.2k
&&
3593
31.2k
        
!cast<ConstantInt>(Op1->getOperand(1))->isZero()406
)
3594
406
      return getOperands(0);
3595
30.8k
    break;
3596
31.2k
  }
3597
77.5k
  case Instruction::Shl: {
3598
    // Same as multiplies, with the difference that we don't need to check
3599
    // for a non-zero multiply. Shifts always multiply by non-zero.
3600
77.5k
    auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3601
77.5k
    auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3602
77.5k
    if ((!OBO1->hasNoUnsignedWrap() || 
!OBO2->hasNoUnsignedWrap()11.4k
) &&
3603
77.5k
        
(66.8k
!OBO1->hasNoSignedWrap()66.8k
||
!OBO2->hasNoSignedWrap()32.1k
))
3604
35.8k
      break;
3605
3606
41.6k
    if (Op1->getOperand(1) == Op2->getOperand(1))
3607
29.6k
      return getOperands(0);
3608
12.0k
    break;
3609
41.6k
  }
3610
85.5k
  case Instruction::AShr:
3611
204k
  case Instruction::LShr: {
3612
204k
    auto *PEO1 = cast<PossiblyExactOperator>(Op1);
3613
204k
    auto *PEO2 = cast<PossiblyExactOperator>(Op2);
3614
204k
    if (!PEO1->isExact() || 
!PEO2->isExact()77.3k
)
3615
127k
      break;
3616
3617
76.7k
    if (Op1->getOperand(1) == Op2->getOperand(1))
3618
72.2k
      return getOperands(0);
3619
4.50k
    break;
3620
76.7k
  }
3621
85.9k
  case Instruction::SExt:
3622
317k
  case Instruction::ZExt:
3623
317k
    if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
3624
309k
      return getOperands(0);
3625
8.67k
    break;
3626
3.20M
  case Instruction::PHI: {
3627
3.20M
    const PHINode *PN1 = cast<PHINode>(Op1);
3628
3.20M
    const PHINode *PN2 = cast<PHINode>(Op2);
3629
3630
    // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3631
    // are a single invertible function of the start values? Note that repeated
3632
    // application of an invertible function is also invertible
3633
3.20M
    BinaryOperator *BO1 = nullptr;
3634
3.20M
    Value *Start1 = nullptr, *Step1 = nullptr;
3635
3.20M
    BinaryOperator *BO2 = nullptr;
3636
3.20M
    Value *Start2 = nullptr, *Step2 = nullptr;
3637
3.20M
    if (PN1->getParent() != PN2->getParent() ||
3638
3.20M
        
!matchSimpleRecurrence(PN1, BO1, Start1, Step1)2.36M
||
3639
3.20M
        
!matchSimpleRecurrence(PN2, BO2, Start2, Step2)23.1k
)
3640
3.19M
      break;
3641
3642
10.3k
    auto Values = getInvertibleOperands(cast<Operator>(BO1),
3643
10.3k
                                        cast<Operator>(BO2));
3644
10.3k
    if (!Values)
3645
4.62k
       break;
3646
3647
    // We have to be careful of mutually defined recurrences here.  Ex:
3648
    // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3649
    // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3650
    // The invertibility of these is complicated, and not worth reasoning
3651
    // about (yet?).
3652
5.72k
    if (Values->first != PN1 || Values->second != PN2)
3653
0
      break;
3654
3655
5.72k
    return std::make_pair(Start1, Start2);
3656
5.72k
  }
3657
43.2M
  }
3658
42.6M
  return std::nullopt;
3659
43.2M
}
3660
3661
/// Return true if V1 == (binop V2, X), where X is known non-zero.
3662
/// Only handle a small subset of binops where (binop V2, X) with non-zero X
3663
/// implies V2 != V1.
3664
static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3665
                                      const APInt &DemandedElts,
3666
410M
                                      const SimplifyQuery &Q, unsigned Depth) {
3667
410M
  const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
3668
410M
  if (!BO)
3669
378M
    return false;
3670
32.0M
  switch (BO->getOpcode()) {
3671
15.8M
  default:
3672
15.8M
    break;
3673
15.8M
  case Instruction::Or:
3674
753k
    if (!cast<PossiblyDisjointInst>(V1)->isDisjoint())
3675
238k
      break;
3676
515k
    [[fallthrough]];
3677
656k
  case Instruction::Xor:
3678
15.9M
  case Instruction::Add:
3679
15.9M
    Value *Op = nullptr;
3680
15.9M
    if (V2 == BO->getOperand(0))
3681
66.8k
      Op = BO->getOperand(1);
3682
15.8M
    else if (V2 == BO->getOperand(1))
3683
170k
      Op = BO->getOperand(0);
3684
15.7M
    else
3685
15.7M
      return false;
3686
236k
    return isKnownNonZero(Op, DemandedElts, Q, Depth + 1);
3687
32.0M
  }
3688
16.0M
  return false;
3689
32.0M
}
3690
3691
/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3692
/// the multiplication is nuw or nsw.
3693
static bool isNonEqualMul(const Value *V1, const Value *V2,
3694
                          const APInt &DemandedElts, const SimplifyQuery &Q,
3695
410M
                          unsigned Depth) {
3696
410M
  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3697
20.0M
    const APInt *C;
3698
20.0M
    return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
3699
20.0M
           
(3.10k
OBO->hasNoUnsignedWrap()3.10k
||
OBO->hasNoSignedWrap()2.70k
) &&
3700
20.0M
           
!C->isZero()1.19k
&&
!C->isOne()1.19k
&&
3701
20.0M
           
isKnownNonZero(V1, DemandedElts, Q, Depth + 1)1.19k
;
3702
20.0M
  }
3703
390M
  return false;
3704
410M
}
3705
3706
/// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3707
/// the shift is nuw or nsw.
3708
static bool isNonEqualShl(const Value *V1, const Value *V2,
3709
                          const APInt &DemandedElts, const SimplifyQuery &Q,
3710
410M
                          unsigned Depth) {
3711
410M
  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3712
20.0M
    const APInt *C;
3713
20.0M
    return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
3714
20.0M
           
(4.22k
OBO->hasNoUnsignedWrap()4.22k
||
OBO->hasNoSignedWrap()2.58k
) &&
3715
20.0M
           
!C->isZero()2.70k
&&
isKnownNonZero(V1, DemandedElts, Q, Depth + 1)2.70k
;
3716
20.0M
  }
3717
390M
  return false;
3718
410M
}
3719
3720
static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3721
                           const APInt &DemandedElts, const SimplifyQuery &Q,
3722
3.19M
                           unsigned Depth) {
3723
  // Check two PHIs are in same block.
3724
3.19M
  if (PN1->getParent() != PN2->getParent())
3725
836k
    return false;
3726
3727
2.36M
  SmallPtrSet<const BasicBlock *, 8> VisitedBBs;
3728
2.36M
  bool UsedFullRecursion = false;
3729
2.38M
  for (const BasicBlock *IncomBB : PN1->blocks()) {
3730
2.38M
    if (!VisitedBBs.insert(IncomBB).second)
3731
132
      continue; // Don't reprocess blocks that we have dealt with already.
3732
2.38M
    const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
3733
2.38M
    const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
3734
2.38M
    const APInt *C1, *C2;
3735
2.38M
    if (match(IV1, m_APInt(C1)) && 
match(IV2, m_APInt(C2))47.9k
&&
*C1 != *C234.2k
)
3736
17.7k
      continue;
3737
3738
    // Only one pair of phi operands is allowed for full recursion.
3739
2.36M
    if (UsedFullRecursion)
3740
9.54k
      return false;
3741
3742
2.35M
    SimplifyQuery RecQ = Q.getWithoutCondContext();
3743
2.35M
    RecQ.CxtI = IncomBB->getTerminator();
3744
2.35M
    if (!isKnownNonEqual(IV1, IV2, DemandedElts, RecQ, Depth + 1))
3745
2.34M
      return false;
3746
13.0k
    UsedFullRecursion = true;
3747
13.0k
  }
3748
5.04k
  return true;
3749
2.36M
}
3750
3751
static bool isNonEqualSelect(const Value *V1, const Value *V2,
3752
                             const APInt &DemandedElts, const SimplifyQuery &Q,
3753
408M
                             unsigned Depth) {
3754
408M
  const SelectInst *SI1 = dyn_cast<SelectInst>(V1);
3755
408M
  if (!SI1)
3756
403M
    return false;
3757
3758
4.36M
  if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) {
3759
553k
    const Value *Cond1 = SI1->getCondition();
3760
553k
    const Value *Cond2 = SI2->getCondition();
3761
553k
    if (Cond1 == Cond2)
3762
126k
      return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(),
3763
126k
                             DemandedElts, Q, Depth + 1) &&
3764
126k
             isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(),
3765
10.9k
                             DemandedElts, Q, Depth + 1);
3766
553k
  }
3767
4.23M
  return isKnownNonEqual(SI1->getTrueValue(), V2, DemandedElts, Q, Depth + 1) &&
3768
4.23M
         
isKnownNonEqual(SI1->getFalseValue(), V2, DemandedElts, Q, Depth + 1)474k
;
3769
4.36M
}
3770
3771
// Check to see if A is both a GEP and is the incoming value for a PHI in the
3772
// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3773
// one of them being the recursive GEP A and the other a ptr at same base and at
3774
// the same/higher offset than B we are only incrementing the pointer further in
3775
// loop if offset of recursive GEP is greater than 0.
3776
static bool isNonEqualPointersWithRecursiveGEP(const Value *A, const Value *B,
3777
408M
                                               const SimplifyQuery &Q) {
3778
408M
  if (!A->getType()->isPointerTy() || 
!B->getType()->isPointerTy()180M
)
3779
227M
    return false;
3780
3781
180M
  auto *GEPA = dyn_cast<GEPOperator>(A);
3782
180M
  if (!GEPA || 
GEPA->getNumIndices() != 159.1M
||
!isa<Constant>(GEPA->idx_begin())59.1M
)
3783
131M
    return false;
3784
3785
  // Handle 2 incoming PHI values with one being a recursive GEP.
3786
48.9M
  auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand());
3787
48.9M
  if (!PN || 
PN->getNumIncomingValues() != 219.1M
)
3788
30.0M
    return false;
3789
3790
  // Search for the recursive GEP as an incoming operand, and record that as
3791
  // Step.
3792
18.9M
  Value *Start = nullptr;
3793
18.9M
  Value *Step = const_cast<Value *>(A);
3794
18.9M
  if (PN->getIncomingValue(0) == Step)
3795
12.7M
    Start = PN->getIncomingValue(1);
3796
6.22M
  else if (PN->getIncomingValue(1) == Step)
3797
2.21M
    Start = PN->getIncomingValue(0);
3798
4.01M
  else
3799
4.01M
    return false;
3800
3801
  // Other incoming node base should match the B base.
3802
  // StartOffset >= OffsetB && StepOffset > 0?
3803
  // StartOffset <= OffsetB && StepOffset < 0?
3804
  // Is non-equal if above are true.
3805
  // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3806
  // optimisation to inbounds GEPs only.
3807
14.9M
  unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType());
3808
14.9M
  APInt StartOffset(IndexWidth, 0);
3809
14.9M
  Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset);
3810
14.9M
  APInt StepOffset(IndexWidth, 0);
3811
14.9M
  Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset);
3812
3813
  // Check if Base Pointer of Step matches the PHI.
3814
14.9M
  if (Step != PN)
3815
74.4k
    return false;
3816
14.8M
  APInt OffsetB(IndexWidth, 0);
3817
14.8M
  B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB);
3818
14.8M
  return Start == B &&
3819
14.8M
         
(998k
(998k
StartOffset.sge(OffsetB)998k
&&
StepOffset.isStrictlyPositive()892k
) ||
3820
998k
          
(824k
StartOffset.sle(OffsetB)824k
&&
StepOffset.isNegative()121k
));
3821
14.9M
}
3822
3823
static bool isKnownNonEqualFromContext(const Value *V1, const Value *V2,
3824
199M
                                       const SimplifyQuery &Q, unsigned Depth) {
3825
199M
  if (!Q.CxtI)
3826
17.9M
    return false;
3827
3828
  // Try to infer NonEqual based on information from dominating conditions.
3829
181M
  if (Q.DC && 
Q.DT94.6M
) {
3830
189M
    auto IsKnownNonEqualFromDominatingCondition = [&](const Value *V) {
3831
189M
      for (BranchInst *BI : Q.DC->conditionsFor(V)) {
3832
29.3M
        Value *Cond = BI->getCondition();
3833
29.3M
        BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
3834
29.3M
        if (Q.DT->dominates(Edge0, Q.CxtI->getParent()) &&
3835
29.3M
            isImpliedCondition(Cond, ICmpInst::ICMP_NE, V1, V2, Q.DL,
3836
2.32M
                               /*LHSIsTrue=*/true, Depth)
3837
2.32M
                .value_or(false))
3838
8.18k
          return true;
3839
3840
29.3M
        BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
3841
29.3M
        if (Q.DT->dominates(Edge1, Q.CxtI->getParent()) &&
3842
29.3M
            isImpliedCondition(Cond, ICmpInst::ICMP_NE, V1, V2, Q.DL,
3843
9.22M
                               /*LHSIsTrue=*/false, Depth)
3844
9.22M
                .value_or(false))
3845
354k
          return true;
3846
29.3M
      }
3847
3848
188M
      return false;
3849
189M
    };
3850
3851
94.6M
    if (IsKnownNonEqualFromDominatingCondition(V1) ||
3852
94.6M
        
IsKnownNonEqualFromDominatingCondition(V2)94.3M
)
3853
362k
      return true;
3854
94.6M
  }
3855
3856
181M
  if (!Q.AC)
3857
15.9M
    return false;
3858
3859
  // Try to infer NonEqual based on information from assumptions.
3860
165M
  for (auto &AssumeVH : Q.AC->assumptionsFor(V1)) {
3861
2.94M
    if (!AssumeVH)
3862
493k
      continue;
3863
2.44M
    CallInst *I = cast<CallInst>(AssumeVH);
3864
3865
2.44M
    assert(I->getFunction() == Q.CxtI->getFunction() &&
3866
2.44M
           "Got assumption for the wrong function!");
3867
2.44M
    assert(I->getIntrinsicID() == Intrinsic::assume &&
3868
2.44M
           "must be an assume intrinsic");
3869
3870
2.44M
    if (isImpliedCondition(I->getArgOperand(0), ICmpInst::ICMP_NE, V1, V2, Q.DL,
3871
2.44M
                           /*LHSIsTrue=*/true, Depth)
3872
2.44M
            .value_or(false) &&
3873
2.44M
        
isValidAssumeForContext(I, Q.CxtI, Q.DT)1.06M
)
3874
23.4k
      return true;
3875
2.44M
  }
3876
3877
165M
  return false;
3878
165M
}
3879
3880
/// Return true if it is known that V1 != V2.
3881
static bool isKnownNonEqual(const Value *V1, const Value *V2,
3882
                            const APInt &DemandedElts, const SimplifyQuery &Q,
3883
207M
                            unsigned Depth) {
3884
207M
  if (V1 == V2)
3885
756k
    return false;
3886
207M
  if (V1->getType() != V2->getType())
3887
    // We can't look through casts yet.
3888
0
    return false;
3889
3890
207M
  if (Depth >= MaxAnalysisRecursionDepth)
3891
1.07M
    return false;
3892
3893
  // See if we can recurse through (exactly one of) our operands.  This
3894
  // requires our operation be 1-to-1 and map every input value to exactly
3895
  // one output value.  Such an operation is invertible.
3896
205M
  auto *O1 = dyn_cast<Operator>(V1);
3897
205M
  auto *O2 = dyn_cast<Operator>(V2);
3898
205M
  if (O1 && 
O2193M
&&
O1->getOpcode() == O2->getOpcode()112M
) {
3899
43.2M
    if (auto Values = getInvertibleOperands(O1, O2))
3900
664k
      return isKnownNonEqual(Values->first, Values->second, DemandedElts, Q,
3901
664k
                             Depth + 1);
3902
3903
42.5M
    if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
3904
3.19M
      const PHINode *PN2 = cast<PHINode>(V2);
3905
      // FIXME: This is missing a generalization to handle the case where one is
3906
      // a PHI and another one isn't.
3907
3.19M
      if (isNonEqualPHIs(PN1, PN2, DemandedElts, Q, Depth))
3908
5.04k
        return true;
3909
42.5M
    };
3910
42.5M
  }
3911
3912
205M
  if (isModifyingBinopOfNonZero(V1, V2, DemandedElts, Q, Depth) ||
3913
205M
      
isModifyingBinopOfNonZero(V2, V1, DemandedElts, Q, Depth)205M
)
3914
73.4k
    return true;
3915
3916
205M
  if (isNonEqualMul(V1, V2, DemandedElts, Q, Depth) ||
3917
205M
      
isNonEqualMul(V2, V1, DemandedElts, Q, Depth)205M
)
3918
529
    return true;
3919
3920
205M
  if (isNonEqualShl(V1, V2, DemandedElts, Q, Depth) ||
3921
205M
      
isNonEqualShl(V2, V1, DemandedElts, Q, Depth)205M
)
3922
1.52k
    return true;
3923
3924
205M
  if (V1->getType()->isIntOrIntVectorTy()) {
3925
    // Are any known bits in V1 contradictory to known bits in V2? If V1
3926
    // has a known zero where V2 has a known one, they must not be equal.
3927
114M
    KnownBits Known1 = computeKnownBits(V1, DemandedElts, Q, Depth);
3928
114M
    if (!Known1.isUnknown()) {
3929
28.5M
      KnownBits Known2 = computeKnownBits(V2, DemandedElts, Q, Depth);
3930
28.5M
      if (Known1.Zero.intersects(Known2.One) ||
3931
28.5M
          
Known2.Zero.intersects(Known1.One)27.6M
)
3932
1.00M
        return true;
3933
28.5M
    }
3934
114M
  }
3935
3936
204M
  if (isNonEqualSelect(V1, V2, DemandedElts, Q, Depth) ||
3937
204M
      
isNonEqualSelect(V2, V1, DemandedElts, Q, Depth)204M
)
3938
18.4k
    return true;
3939
3940
204M
  if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) ||
3941
204M
      
isNonEqualPointersWithRecursiveGEP(V2, V1, Q)203M
)
3942
189k
    return true;
3943
3944
203M
  Value *A, *B;
3945
  // PtrToInts are NonEqual if their Ptrs are NonEqual.
3946
  // Check PtrToInt type matches the pointer size.
3947
203M
  if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) &&
3948
203M
      
match(V2, m_PtrToIntSameSize(Q.DL, m_Value(B)))4.55M
)
3949
4.48M
    return isKnownNonEqual(A, B, DemandedElts, Q, Depth + 1);
3950
3951
199M
  if (isKnownNonEqualFromContext(V1, V2, Q, Depth))
3952
386k
    return true;
3953
3954
199M
  return false;
3955
199M
}
3956
3957
/// For vector constants, loop over the elements and find the constant with the
3958
/// minimum number of sign bits. Return 0 if the value is not a vector constant
3959
/// or if any element was not analyzed; otherwise, return the count for the
3960
/// element with the minimum number of sign bits.
3961
static unsigned computeNumSignBitsVectorConstant(const Value *V,
3962
                                                 const APInt &DemandedElts,
3963
265M
                                                 unsigned TyBits) {
3964
265M
  const auto *CV = dyn_cast<Constant>(V);
3965
265M
  if (!CV || 
!isa<FixedVectorType>(CV->getType())26.8M
)
3966
265M
    return 0;
3967
3968
17.1k
  unsigned MinSignBits = TyBits;
3969
17.1k
  unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3970
151k
  for (unsigned i = 0; i != NumElts; 
++i134k
) {
3971
134k
    if (!DemandedElts[i])
3972
11.8k
      continue;
3973
    // If we find a non-ConstantInt, bail out.
3974
123k
    auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3975
123k
    if (!Elt)
3976
200
      return 0;
3977
3978
122k
    MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3979
122k
  }
3980
3981
16.9k
  return MinSignBits;
3982
17.1k
}
3983
3984
static unsigned ComputeNumSignBitsImpl(const Value *V,
3985
                                       const APInt &DemandedElts,
3986
                                       const SimplifyQuery &Q, unsigned Depth);
3987
3988
static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3989
368M
                                   const SimplifyQuery &Q, unsigned Depth) {
3990
368M
  unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Q, Depth);
3991
368M
  assert(Result > 0 && "At least one sign bit needs to be present!");
3992
368M
  return Result;
3993
368M
}
3994
3995
/// Return the number of times the sign bit of the register is replicated into
3996
/// the other bits. We know that at least 1 bit is always equal to the sign bit
3997
/// (itself), but other cases can give us information. For example, immediately
3998
/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3999
/// other, so we return 3. For vectors, return the number of sign bits for the
4000
/// vector element with the minimum number of known sign bits of the demanded
4001
/// elements in the vector specified by DemandedElts.
4002
static unsigned ComputeNumSignBitsImpl(const Value *V,
4003
                                       const APInt &DemandedElts,
4004
368M
                                       const SimplifyQuery &Q, unsigned Depth) {
4005
368M
  Type *Ty = V->getType();
4006
#ifndef NDEBUG
4007
  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
4008
4009
  if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
4010
    assert(
4011
        FVTy->getNumElements() == DemandedElts.getBitWidth() &&
4012
        "DemandedElt width should equal the fixed vector number of elements");
4013
  } else {
4014
    assert(DemandedElts == APInt(1, 1) &&
4015
           "DemandedElt width should be 1 for scalars");
4016
  }
4017
#endif
4018
4019
  // We return the minimum number of sign bits that are guaranteed to be present
4020
  // in V, so for undef we have to conservatively return 1.  We don't have the
4021
  // same behavior for poison though -- that's a FIXME today.
4022
4023
368M
  Type *ScalarTy = Ty->getScalarType();
4024
368M
  unsigned TyBits = ScalarTy->isPointerTy() ?
4025
46.4M
    Q.DL.getPointerTypeSizeInBits(ScalarTy) :
4026
368M
    
Q.DL.getTypeSizeInBits(ScalarTy)321M
;
4027
4028
368M
  unsigned Tmp, Tmp2;
4029
368M
  unsigned FirstAnswer = 1;
4030
4031
  // Note that ConstantInt is handled by the general computeKnownBits case
4032
  // below.
4033
4034
368M
  if (Depth == MaxAnalysisRecursionDepth)
4035
15.2M
    return 1;
4036
4037
352M
  if (auto *U = dyn_cast<Operator>(V)) {
4038
309M
    switch (Operator::getOpcode(V)) {
4039
135M
    default: break;
4040
135M
    case Instruction::BitCast: {
4041
821k
      Value *Src = U->getOperand(0);
4042
821k
      Type *SrcTy = Src->getType();
4043
4044
      // Skip if the source type is not an integer or integer vector type
4045
      // This ensures we only process integer-like types
4046
821k
      if (!SrcTy->isIntOrIntVectorTy())
4047
311k
        break;
4048
4049
510k
      unsigned SrcBits = SrcTy->getScalarSizeInBits();
4050
4051
      // Bitcast 'large element' scalar/vector to 'small element' vector.
4052
510k
      if ((SrcBits % TyBits) != 0)
4053
462k
        break;
4054
4055
      // Only proceed if the destination type is a fixed-size vector
4056
48.6k
      if (isa<FixedVectorType>(Ty)) {
4057
        // Fast case - sign splat can be simply split across the small elements.
4058
        // This works for both vector and scalar sources
4059
48.6k
        Tmp = ComputeNumSignBits(Src, Q, Depth + 1);
4060
48.6k
        if (Tmp == SrcBits)
4061
0
          return TyBits;
4062
48.6k
      }
4063
48.6k
      break;
4064
48.6k
    }
4065
2.04M
    case Instruction::SExt:
4066
2.04M
      Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
4067
2.04M
      return ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1) +
4068
2.04M
             Tmp;
4069
4070
1.70M
    case Instruction::SDiv: {
4071
1.70M
      const APInt *Denominator;
4072
      // sdiv X, C -> adds log(C) sign bits.
4073
1.70M
      if (match(U->getOperand(1), m_APInt(Denominator))) {
4074
4075
        // Ignore non-positive denominator.
4076
1.56M
        if (!Denominator->isStrictlyPositive())
4077
21.3k
          break;
4078
4079
        // Calculate the incoming numerator bits.
4080
1.54M
        unsigned NumBits =
4081
1.54M
            ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1);
4082
4083
        // Add floor(log(C)) bits to the numerator bits.
4084
1.54M
        return std::min(TyBits, NumBits + Denominator->logBase2());
4085
1.56M
      }
4086
136k
      break;
4087
1.70M
    }
4088
4089
181k
    case Instruction::SRem: {
4090
181k
      Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1);
4091
4092
181k
      const APInt *Denominator;
4093
      // srem X, C -> we know that the result is within [-C+1,C) when C is a
4094
      // positive constant.  This let us put a lower bound on the number of sign
4095
      // bits.
4096
181k
      if (match(U->getOperand(1), m_APInt(Denominator))) {
4097
4098
        // Ignore non-positive denominator.
4099
124k
        if (Denominator->isStrictlyPositive()) {
4100
          // Calculate the leading sign bit constraints by examining the
4101
          // denominator.  Given that the denominator is positive, there are two
4102
          // cases:
4103
          //
4104
          //  1. The numerator is positive. The result range is [0,C) and
4105
          //     [0,C) u< (1 << ceilLogBase2(C)).
4106
          //
4107
          //  2. The numerator is negative. Then the result range is (-C,0] and
4108
          //     integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
4109
          //
4110
          // Thus a lower bound on the number of sign bits is `TyBits -
4111
          // ceilLogBase2(C)`.
4112
4113
124k
          unsigned ResBits = TyBits - Denominator->ceilLogBase2();
4114
124k
          Tmp = std::max(Tmp, ResBits);
4115
124k
        }
4116
124k
      }
4117
181k
      return Tmp;
4118
1.70M
    }
4119
4120
2.31M
    case Instruction::AShr: {
4121
2.31M
      Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1);
4122
      // ashr X, C   -> adds C sign bits.  Vectors too.
4123
2.31M
      const APInt *ShAmt;
4124
2.31M
      if (match(U->getOperand(1), m_APInt(ShAmt))) {
4125
2.25M
        if (ShAmt->uge(TyBits))
4126
0
          break; // Bad shift.
4127
2.25M
        unsigned ShAmtLimited = ShAmt->getZExtValue();
4128
2.25M
        Tmp += ShAmtLimited;
4129
2.25M
        if (Tmp > TyBits) 
Tmp = TyBits9.84k
;
4130
2.25M
      }
4131
2.31M
      return Tmp;
4132
2.31M
    }
4133
6.87M
    case Instruction::Shl: {
4134
6.87M
      const APInt *ShAmt;
4135
6.87M
      Value *X = nullptr;
4136
6.87M
      if (match(U->getOperand(1), m_APInt(ShAmt))) {
4137
        // shl destroys sign bits.
4138
5.85M
        if (ShAmt->uge(TyBits))
4139
0
          break; // Bad shift.
4140
        // We can look through a zext (more or less treating it as a sext) if
4141
        // all extended bits are shifted out.
4142
5.85M
        if (match(U->getOperand(0), m_ZExt(m_Value(X))) &&
4143
5.85M
            
ShAmt->uge(TyBits - X->getType()->getScalarSizeInBits())853k
) {
4144
415k
          Tmp = ComputeNumSignBits(X, DemandedElts, Q, Depth + 1);
4145
415k
          Tmp += TyBits - X->getType()->getScalarSizeInBits();
4146
415k
        } else
4147
5.44M
          Tmp =
4148
5.44M
              ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1);
4149
5.85M
        if (ShAmt->uge(Tmp))
4150
4.70M
          break; // Shifted all sign bits out.
4151
1.15M
        Tmp2 = ShAmt->getZExtValue();
4152
1.15M
        return Tmp - Tmp2;
4153
5.85M
      }
4154
1.01M
      break;
4155
6.87M
    }
4156
4.83M
    case Instruction::And:
4157
7.96M
    case Instruction::Or:
4158
10.9M
    case Instruction::Xor: // NOT is handled here.
4159
      // Logical binary ops preserve the number of sign bits at the worst.
4160
10.9M
      Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1);
4161
10.9M
      if (Tmp != 1) {
4162
2.85M
        Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Q, Depth + 1);
4163
2.85M
        FirstAnswer = std::min(Tmp, Tmp2);
4164
        // We computed what we know about the sign bits as our first
4165
        // answer. Now proceed to the generic code that uses
4166
        // computeKnownBits, and pick whichever answer is better.
4167
2.85M
      }
4168
10.9M
      break;
4169
4170
3.88M
    case Instruction::Select: {
4171
      // If we have a clamp pattern, we know that the number of sign bits will
4172
      // be the minimum of the clamp min/max range.
4173
3.88M
      const Value *X;
4174
3.88M
      const APInt *CLow, *CHigh;
4175
3.88M
      if (isSignedMinMaxClamp(U, X, CLow, CHigh))
4176
6
        return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4177
4178
3.88M
      Tmp = ComputeNumSignBits(U->getOperand(1), DemandedElts, Q, Depth + 1);
4179
3.88M
      if (Tmp == 1)
4180
2.15M
        break;
4181
1.73M
      Tmp2 = ComputeNumSignBits(U->getOperand(2), DemandedElts, Q, Depth + 1);
4182
1.73M
      return std::min(Tmp, Tmp2);
4183
3.88M
    }
4184
4185
39.4M
    case Instruction::Add:
4186
      // Add can have at most one carry bit.  Thus we know that the output
4187
      // is, at worst, one more bit than the inputs.
4188
39.4M
      Tmp = ComputeNumSignBits(U->getOperand(0), Q, Depth + 1);
4189
39.4M
      if (Tmp == 1) 
break36.4M
;
4190
4191
      // Special case decrementing a value (ADD X, -1):
4192
2.95M
      if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
4193
1.25M
        if (CRHS->isAllOnesValue()) {
4194
86.4k
          KnownBits Known(TyBits);
4195
86.4k
          computeKnownBits(U->getOperand(0), DemandedElts, Known, Q, Depth + 1);
4196
4197
          // If the input is known to be 0 or 1, the output is 0/-1, which is
4198
          // all sign bits set.
4199
86.4k
          if ((Known.Zero | 1).isAllOnes())
4200
462
            return TyBits;
4201
4202
          // If we are subtracting one from a positive number, there is no carry
4203
          // out of the result.
4204
86.0k
          if (Known.isNonNegative())
4205
58.3k
            return Tmp;
4206
86.0k
        }
4207
4208
2.89M
      Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Q, Depth + 1);
4209
2.89M
      if (Tmp2 == 1)
4210
784k
        break;
4211
2.11M
      return std::min(Tmp, Tmp2) - 1;
4212
4213
18.0M
    case Instruction::Sub:
4214
18.0M
      Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Q, Depth + 1);
4215
18.0M
      if (Tmp2 == 1)
4216
17.1M
        break;
4217
4218
      // Handle NEG.
4219
865k
      if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
4220
148k
        if (CLHS->isNullValue()) {
4221
50.5k
          KnownBits Known(TyBits);
4222
50.5k
          computeKnownBits(U->getOperand(1), DemandedElts, Known, Q, Depth + 1);
4223
          // If the input is known to be 0 or 1, the output is 0/-1, which is
4224
          // all sign bits set.
4225
50.5k
          if ((Known.Zero | 1).isAllOnes())
4226
3.12k
            return TyBits;
4227
4228
          // If the input is known to be positive (the sign bit is known clear),
4229
          // the output of the NEG has the same number of sign bits as the
4230
          // input.
4231
47.4k
          if (Known.isNonNegative())
4232
19.3k
            return Tmp2;
4233
4234
          // Otherwise, we treat this like a SUB.
4235
47.4k
        }
4236
4237
      // Sub can have at most one carry bit.  Thus we know that the output
4238
      // is, at worst, one more bit than the inputs.
4239
842k
      Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1);
4240
842k
      if (Tmp == 1)
4241
429k
        break;
4242
413k
      return std::min(Tmp, Tmp2) - 1;
4243
4244
4.08M
    case Instruction::Mul: {
4245
      // The output of the Mul can be at most twice the valid bits in the
4246
      // inputs.
4247
4.08M
      unsigned SignBitsOp0 =
4248
4.08M
          ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1);
4249
4.08M
      if (SignBitsOp0 == 1)
4250
2.33M
        break;
4251
1.74M
      unsigned SignBitsOp1 =
4252
1.74M
          ComputeNumSignBits(U->getOperand(1), DemandedElts, Q, Depth + 1);
4253
1.74M
      if (SignBitsOp1 == 1)
4254
115k
        break;
4255
1.63M
      unsigned OutValidBits =
4256
1.63M
          (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
4257
1.63M
      return OutValidBits > TyBits ? 
1849k
:
TyBits - OutValidBits + 1783k
;
4258
1.74M
    }
4259
4260
72.4M
    case Instruction::PHI: {
4261
72.4M
      const PHINode *PN = cast<PHINode>(U);
4262
72.4M
      unsigned NumIncomingValues = PN->getNumIncomingValues();
4263
      // Don't analyze large in-degree PHIs.
4264
72.4M
      if (NumIncomingValues > 4) 
break1.37M
;
4265
      // Unreachable blocks may have zero-operand PHI nodes.
4266
71.1M
      if (NumIncomingValues == 0) 
break0
;
4267
4268
      // Take the minimum of all incoming values.  This can't infinitely loop
4269
      // because of our depth threshold.
4270
71.1M
      SimplifyQuery RecQ = Q.getWithoutCondContext();
4271
71.1M
      Tmp = TyBits;
4272
160M
      for (unsigned i = 0, e = NumIncomingValues; i != e; 
++i89.4M
) {
4273
142M
        if (Tmp == 1) 
return Tmp53.1M
;
4274
89.4M
        RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
4275
89.4M
        Tmp = std::min(Tmp, ComputeNumSignBits(PN->getIncomingValue(i),
4276
89.4M
                                               DemandedElts, RecQ, Depth + 1));
4277
89.4M
      }
4278
17.9M
      return Tmp;
4279
71.1M
    }
4280
4281
2.46M
    case Instruction::Trunc: {
4282
      // If the input contained enough sign bits that some remain after the
4283
      // truncation, then we can make use of that. Otherwise we don't know
4284
      // anything.
4285
2.46M
      Tmp = ComputeNumSignBits(U->getOperand(0), Q, Depth + 1);
4286
2.46M
      unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits();
4287
2.46M
      if (Tmp > (OperandTyBits - TyBits))
4288
212k
        return Tmp - (OperandTyBits - TyBits);
4289
4290
2.25M
      return 1;
4291
2.46M
    }
4292
4293
24.5k
    case Instruction::ExtractElement:
4294
      // Look through extract element. At the moment we keep this simple and
4295
      // skip tracking the specific element. But at least we might find
4296
      // information valid for all elements of the vector (for example if vector
4297
      // is sign extended, shifted, etc).
4298
24.5k
      return ComputeNumSignBits(U->getOperand(0), Q, Depth + 1);
4299
4300
66.4k
    case Instruction::ShuffleVector: {
4301
      // Collect the minimum number of sign bits that are shared by every vector
4302
      // element referenced by the shuffle.
4303
66.4k
      auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
4304
66.4k
      if (!Shuf) {
4305
        // FIXME: Add support for shufflevector constant expressions.
4306
0
        return 1;
4307
0
      }
4308
66.4k
      APInt DemandedLHS, DemandedRHS;
4309
      // For undef elements, we don't know anything about the common state of
4310
      // the shuffle result.
4311
66.4k
      if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
4312
6.71k
        return 1;
4313
59.6k
      Tmp = std::numeric_limits<unsigned>::max();
4314
59.6k
      if (!!DemandedLHS) {
4315
59.6k
        const Value *LHS = Shuf->getOperand(0);
4316
59.6k
        Tmp = ComputeNumSignBits(LHS, DemandedLHS, Q, Depth + 1);
4317
59.6k
      }
4318
      // If we don't know anything, early out and try computeKnownBits
4319
      // fall-back.
4320
59.6k
      if (Tmp == 1)
4321
56.5k
        break;
4322
3.15k
      if (!!DemandedRHS) {
4323
362
        const Value *RHS = Shuf->getOperand(1);
4324
362
        Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Q, Depth + 1);
4325
362
        Tmp = std::min(Tmp, Tmp2);
4326
362
      }
4327
      // If we don't know anything, early out and try computeKnownBits
4328
      // fall-back.
4329
3.15k
      if (Tmp == 1)
4330
135
        break;
4331
3.15k
      assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
4332
3.01k
      return Tmp;
4333
3.15k
    }
4334
8.82M
    case Instruction::Call: {
4335
8.82M
      if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
4336
3.12M
        switch (II->getIntrinsicID()) {
4337
2.50M
        default:
4338
2.50M
          break;
4339
2.50M
        case Intrinsic::abs:
4340
71.9k
          Tmp =
4341
71.9k
              ComputeNumSignBits(U->getOperand(0), DemandedElts, Q, Depth + 1);
4342
71.9k
          if (Tmp == 1)
4343
61.1k
            break;
4344
4345
          // Absolute value reduces number of sign bits by at most 1.
4346
10.8k
          return Tmp - 1;
4347
399k
        case Intrinsic::smin:
4348
547k
        case Intrinsic::smax: {
4349
547k
          const APInt *CLow, *CHigh;
4350
547k
          if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
4351
49.7k
            return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4352
547k
        }
4353
3.12M
        }
4354
3.12M
      }
4355
8.82M
    }
4356
309M
    }
4357
309M
  }
4358
4359
  // Finally, if we can prove that the top bits of the result are 0's or 1's,
4360
  // use this information.
4361
4362
  // If we can examine all elements of a vector constant successfully, we're
4363
  // done (we can't do any better than that). If not, keep trying.
4364
265M
  if (unsigned VecSignBits =
4365
265M
          computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
4366
16.9k
    return VecSignBits;
4367
4368
265M
  KnownBits Known(TyBits);
4369
265M
  computeKnownBits(V, DemandedElts, Known, Q, Depth);
4370
4371
  // If we know that the sign bit is either zero or one, determine the number of
4372
  // identical bits in the top of the input value.
4373
265M
  return std::max(FirstAnswer, Known.countMinSignBits());
4374
265M
}
4375
4376
Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
4377
729k
                                            const TargetLibraryInfo *TLI) {
4378
729k
  const Function *F = CB.getCalledFunction();
4379
729k
  if (!F)
4380
53.9k
    return Intrinsic::not_intrinsic;
4381
4382
675k
  if (F->isIntrinsic())
4383
397k
    return F->getIntrinsicID();
4384
4385
  // We are going to infer semantics of a library function based on mapping it
4386
  // to an LLVM intrinsic. Check that the library function is available from
4387
  // this callbase and in this environment.
4388
278k
  LibFunc Func;
4389
278k
  if (F->hasLocalLinkage() || 
!TLI267k
||
!TLI->getLibFunc(CB, Func)267k
||
4390
278k
      
!CB.onlyReadsMemory()106k
)
4391
276k
    return Intrinsic::not_intrinsic;
4392
4393
2.05k
  switch (Func) {
4394
1.90k
  default:
4395
1.90k
    break;
4396
1.90k
  case LibFunc_sin:
4397
0
  case LibFunc_sinf:
4398
0
  case LibFunc_sinl:
4399
0
    return Intrinsic::sin;
4400
0
  case LibFunc_cos:
4401
0
  case LibFunc_cosf:
4402
0
  case LibFunc_cosl:
4403
0
    return Intrinsic::cos;
4404
0
  case LibFunc_tan:
4405
0
  case LibFunc_tanf:
4406
0
  case LibFunc_tanl:
4407
0
    return Intrinsic::tan;
4408
0
  case LibFunc_asin:
4409
0
  case LibFunc_asinf:
4410
0
  case LibFunc_asinl:
4411
0
    return Intrinsic::asin;
4412
0
  case LibFunc_acos:
4413
0
  case LibFunc_acosf:
4414
0
  case LibFunc_acosl:
4415
0
    return Intrinsic::acos;
4416
0
  case LibFunc_atan:
4417
0
  case LibFunc_atanf:
4418
0
  case LibFunc_atanl:
4419
0
    return Intrinsic::atan;
4420
0
  case LibFunc_atan2:
4421
0
  case LibFunc_atan2f:
4422
0
  case LibFunc_atan2l:
4423
0
    return Intrinsic::atan2;
4424
0
  case LibFunc_sinh:
4425
0
  case LibFunc_sinhf:
4426
0
  case LibFunc_sinhl:
4427
0
    return Intrinsic::sinh;
4428
0
  case LibFunc_cosh:
4429
0
  case LibFunc_coshf:
4430
0
  case LibFunc_coshl:
4431
0
    return Intrinsic::cosh;
4432
0
  case LibFunc_tanh:
4433
0
  case LibFunc_tanhf:
4434
0
  case LibFunc_tanhl:
4435
0
    return Intrinsic::tanh;
4436
0
  case LibFunc_exp:
4437
0
  case LibFunc_expf:
4438
0
  case LibFunc_expl:
4439
0
    return Intrinsic::exp;
4440
0
  case LibFunc_exp2:
4441
0
  case LibFunc_exp2f:
4442
0
  case LibFunc_exp2l:
4443
0
    return Intrinsic::exp2;
4444
0
  case LibFunc_exp10:
4445
0
  case LibFunc_exp10f:
4446
0
  case LibFunc_exp10l:
4447
0
    return Intrinsic::exp10;
4448
0
  case LibFunc_log:
4449
0
  case LibFunc_logf:
4450
0
  case LibFunc_logl:
4451
0
    return Intrinsic::log;
4452
0
  case LibFunc_log10:
4453
0
  case LibFunc_log10f:
4454
0
  case LibFunc_log10l:
4455
0
    return Intrinsic::log10;
4456
0
  case LibFunc_log2:
4457
0
  case LibFunc_log2f:
4458
0
  case LibFunc_log2l:
4459
0
    return Intrinsic::log2;
4460
0
  case LibFunc_fabs:
4461
0
  case LibFunc_fabsf:
4462
0
  case LibFunc_fabsl:
4463
0
    return Intrinsic::fabs;
4464
0
  case LibFunc_fmin:
4465
0
  case LibFunc_fminf:
4466
0
  case LibFunc_fminl:
4467
0
    return Intrinsic::minnum;
4468
0
  case LibFunc_fmax:
4469
0
  case LibFunc_fmaxf:
4470
0
  case LibFunc_fmaxl:
4471
0
    return Intrinsic::maxnum;
4472
0
  case LibFunc_copysign:
4473
0
  case LibFunc_copysignf:
4474
0
  case LibFunc_copysignl:
4475
0
    return Intrinsic::copysign;
4476
0
  case LibFunc_floor:
4477
0
  case LibFunc_floorf:
4478
0
  case LibFunc_floorl:
4479
0
    return Intrinsic::floor;
4480
0
  case LibFunc_ceil:
4481
0
  case LibFunc_ceilf:
4482
0
  case LibFunc_ceill:
4483
0
    return Intrinsic::ceil;
4484
0
  case LibFunc_trunc:
4485
0
  case LibFunc_truncf:
4486
0
  case LibFunc_truncl:
4487
0
    return Intrinsic::trunc;
4488
0
  case LibFunc_rint:
4489
103
  case LibFunc_rintf:
4490
103
  case LibFunc_rintl:
4491
103
    return Intrinsic::rint;
4492
0
  case LibFunc_nearbyint:
4493
0
  case LibFunc_nearbyintf:
4494
0
  case LibFunc_nearbyintl:
4495
0
    return Intrinsic::nearbyint;
4496
0
  case LibFunc_round:
4497
50
  case LibFunc_roundf:
4498
50
  case LibFunc_roundl:
4499
50
    return Intrinsic::round;
4500
0
  case LibFunc_roundeven:
4501
0
  case LibFunc_roundevenf:
4502
0
  case LibFunc_roundevenl:
4503
0
    return Intrinsic::roundeven;
4504
0
  case LibFunc_pow:
4505
0
  case LibFunc_powf:
4506
0
  case LibFunc_powl:
4507
0
    return Intrinsic::pow;
4508
0
  case LibFunc_sqrt:
4509
0
  case LibFunc_sqrtf:
4510
0
  case LibFunc_sqrtl:
4511
0
    return Intrinsic::sqrt;
4512
2.05k
  }
4513
4514
1.90k
  return Intrinsic::not_intrinsic;
4515
2.05k
}
4516
4517
665k
static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4518
665k
  Ty = Ty->getScalarType();
4519
665k
  DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4520
665k
  return Mode.Output == DenormalMode::IEEE ||
4521
665k
         
Mode.Output == DenormalMode::PositiveZero0
;
4522
665k
}
4523
/// Given an exploded icmp instruction, return true if the comparison only
4524
/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4525
/// the result of the comparison is true when the input value is signed.
4526
bool llvm::isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
4527
169M
                          bool &TrueIfSigned) {
4528
169M
  switch (Pred) {
4529
5.93M
  case ICmpInst::ICMP_SLT: // True if LHS s< 0
4530
5.93M
    TrueIfSigned = true;
4531
5.93M
    return RHS.isZero();
4532
19
  case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4533
19
    TrueIfSigned = true;
4534
19
    return RHS.isAllOnes();
4535
15.7M
  case ICmpInst::ICMP_SGT: // True if LHS s> -1
4536
15.7M
    TrueIfSigned = false;
4537
15.7M
    return RHS.isAllOnes();
4538
12
  case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4539
12
    TrueIfSigned = false;
4540
12
    return RHS.isZero();
4541
9.36M
  case ICmpInst::ICMP_UGT:
4542
    // True if LHS u> RHS and RHS == sign-bit-mask - 1
4543
9.36M
    TrueIfSigned = true;
4544
9.36M
    return RHS.isMaxSignedValue();
4545
49
  case ICmpInst::ICMP_UGE:
4546
    // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4547
49
    TrueIfSigned = true;
4548
49
    return RHS.isMinSignedValue();
4549
23.3M
  case ICmpInst::ICMP_ULT:
4550
    // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4551
23.3M
    TrueIfSigned = false;
4552
23.3M
    return RHS.isMinSignedValue();
4553
7
  case ICmpInst::ICMP_ULE:
4554
    // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4555
7
    TrueIfSigned = false;
4556
7
    return RHS.isMaxSignedValue();
4557
114M
  default:
4558
114M
    return false;
4559
169M
  }
4560
169M
}
4561
4562
static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
4563
                                        bool CondIsTrue,
4564
                                        const Instruction *CxtI,
4565
                                        KnownFPClass &KnownFromContext,
4566
1.39M
                                        unsigned Depth = 0) {
4567
1.39M
  Value *A, *B;
4568
1.39M
  if (Depth < MaxAnalysisRecursionDepth &&
4569
1.39M
      
(1.39M
CondIsTrue1.39M
?
match(Cond, m_LogicalAnd(m_Value(A), m_Value(B)))652k
4570
1.39M
                  : 
match(Cond, m_LogicalOr(m_Value(A), m_Value(B)))747k
)) {
4571
114k
    computeKnownFPClassFromCond(V, A, CondIsTrue, CxtI, KnownFromContext,
4572
114k
                                Depth + 1);
4573
114k
    computeKnownFPClassFromCond(V, B, CondIsTrue, CxtI, KnownFromContext,
4574
114k
                                Depth + 1);
4575
114k
    return;
4576
114k
  }
4577
1.28M
  if (Depth < MaxAnalysisRecursionDepth && 
match(Cond, m_Not(m_Value(A)))1.28M
) {
4578
260
    computeKnownFPClassFromCond(V, A, !CondIsTrue, CxtI, KnownFromContext,
4579
260
                                Depth + 1);
4580
260
    return;
4581
260
  }
4582
1.28M
  CmpPredicate Pred;
4583
1.28M
  Value *LHS;
4584
1.28M
  uint64_t ClassVal = 0;
4585
1.28M
  const APFloat *CRHS;
4586
1.28M
  const APInt *RHS;
4587
1.28M
  if (match(Cond, m_FCmp(Pred, m_Value(LHS), m_APFloat(CRHS)))) {
4588
1.09M
    auto [CmpVal, MaskIfTrue, MaskIfFalse] = fcmpImpliesClass(
4589
1.09M
        Pred, *CxtI->getParent()->getParent(), LHS, *CRHS, LHS != V);
4590
1.09M
    if (CmpVal == V)
4591
1.03M
      KnownFromContext.knownNot(~(CondIsTrue ? 
MaskIfTrue432k
:
MaskIfFalse597k
));
4592
1.09M
  } else 
if (188k
match(Cond, m_Intrinsic<Intrinsic::is_fpclass>(
4593
188k
                             m_Specific(V), m_ConstantInt(ClassVal)))) {
4594
530
    FPClassTest Mask = static_cast<FPClassTest>(ClassVal);
4595
530
    KnownFromContext.knownNot(CondIsTrue ? 
~Mask73
:
Mask457
);
4596
187k
  } else if (match(Cond, m_ICmp(Pred, m_ElementWiseBitCast(m_Specific(V)),
4597
187k
                                m_APInt(RHS)))) {
4598
88.6k
    bool TrueIfSigned;
4599
88.6k
    if (!isSignBitCheck(Pred, *RHS, TrueIfSigned))
4600
2
      return;
4601
88.6k
    if (TrueIfSigned == CondIsTrue)
4602
57.5k
      KnownFromContext.signBitMustBeOne();
4603
31.1k
    else
4604
31.1k
      KnownFromContext.signBitMustBeZero();
4605
88.6k
  }
4606
1.28M
}
4607
4608
static KnownFPClass computeKnownFPClassFromContext(const Value *V,
4609
22.5M
                                                   const SimplifyQuery &Q) {
4610
22.5M
  KnownFPClass KnownFromContext;
4611
4612
22.5M
  if (Q.CC && 
Q.CC->AffectedValues.contains(V)67.3k
)
4613
521
    computeKnownFPClassFromCond(V, Q.CC->Cond, !Q.CC->Invert, Q.CxtI,
4614
521
                                KnownFromContext);
4615
4616
22.5M
  if (!Q.CxtI)
4617
172k
    return KnownFromContext;
4618
4619
22.3M
  if (Q.DC && 
Q.DT14.5M
) {
4620
    // Handle dominating conditions.
4621
14.5M
    for (BranchInst *BI : Q.DC->conditionsFor(V)) {
4622
1.56M
      Value *Cond = BI->getCondition();
4623
4624
1.56M
      BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
4625
1.56M
      if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
4626
525k
        computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/true, Q.CxtI,
4627
525k
                                    KnownFromContext);
4628
4629
1.56M
      BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
4630
1.56M
      if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
4631
642k
        computeKnownFPClassFromCond(V, Cond, /*CondIsTrue=*/false, Q.CxtI,
4632
642k
                                    KnownFromContext);
4633
1.56M
    }
4634
14.5M
  }
4635
4636
22.3M
  if (!Q.AC)
4637
1.13M
    return KnownFromContext;
4638
4639
  // Try to restrict the floating-point classes based on information from
4640
  // assumptions.
4641
21.2M
  for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
4642
2.89k
    if (!AssumeVH)
4643
177
      continue;
4644
2.71k
    CallInst *I = cast<CallInst>(AssumeVH);
4645
4646
2.71k
    assert(I->getFunction() == Q.CxtI->getParent()->getParent() &&
4647
2.71k
           "Got assumption for the wrong function!");
4648
2.71k
    assert(I->getIntrinsicID() == Intrinsic::assume &&
4649
2.71k
           "must be an assume intrinsic");
4650
4651
2.71k
    if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
4652
785
      continue;
4653
4654
1.92k
    computeKnownFPClassFromCond(V, I->getArgOperand(0),
4655
1.92k
                                /*CondIsTrue=*/true, Q.CxtI, KnownFromContext);
4656
1.92k
  }
4657
4658
21.2M
  return KnownFromContext;
4659
22.3M
}
4660
4661
void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
4662
                         FPClassTest InterestedClasses, KnownFPClass &Known,
4663
                         const SimplifyQuery &Q, unsigned Depth);
4664
4665
static void computeKnownFPClass(const Value *V, KnownFPClass &Known,
4666
                                FPClassTest InterestedClasses,
4667
4.14M
                                const SimplifyQuery &Q, unsigned Depth) {
4668
4.14M
  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
4669
4.14M
  APInt DemandedElts =
4670
4.14M
      FVTy ? 
APInt::getAllOnes(FVTy->getNumElements())279k
:
APInt(1, 1)3.86M
;
4671
4.14M
  computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Q, Depth);
4672
4.14M
}
4673
4674
static void computeKnownFPClassForFPTrunc(const Operator *Op,
4675
                                          const APInt &DemandedElts,
4676
                                          FPClassTest InterestedClasses,
4677
                                          KnownFPClass &Known,
4678
                                          const SimplifyQuery &Q,
4679
246k
                                          unsigned Depth) {
4680
246k
  if ((InterestedClasses &
4681
246k
       (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone)
4682
9.17k
    return;
4683
4684
237k
  KnownFPClass KnownSrc;
4685
237k
  computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
4686
237k
                      KnownSrc, Q, Depth + 1);
4687
4688
  // Sign should be preserved
4689
  // TODO: Handle cannot be ordered greater than zero
4690
237k
  if (KnownSrc.cannotBeOrderedLessThanZero())
4691
69.7k
    Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
4692
4693
237k
  Known.propagateNaN(KnownSrc, true);
4694
4695
  // Infinity needs a range check.
4696
237k
}
4697
4698
void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
4699
                         FPClassTest InterestedClasses, KnownFPClass &Known,
4700
25.4M
                         const SimplifyQuery &Q, unsigned Depth) {
4701
25.4M
  assert(Known.isUnknown() && "should not be called with known information");
4702
4703
25.4M
  if (!DemandedElts) {
4704
    // No demanded elts, better to assume we don't know anything.
4705
0
    Known.resetAll();
4706
0
    return;
4707
0
  }
4708
4709
25.4M
  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
4710
4711
25.4M
  if (auto *CFP = dyn_cast<ConstantFP>(V)) {
4712
2.87M
    Known.KnownFPClasses = CFP->getValueAPF().classify();
4713
2.87M
    Known.SignBit = CFP->isNegative();
4714
2.87M
    return;
4715
2.87M
  }
4716
4717
22.5M
  if (isa<ConstantAggregateZero>(V)) {
4718
36.9k
    Known.KnownFPClasses = fcPosZero;
4719
36.9k
    Known.SignBit = false;
4720
36.9k
    return;
4721
36.9k
  }
4722
4723
22.5M
  if (isa<PoisonValue>(V)) {
4724
1.43k
    Known.KnownFPClasses = fcNone;
4725
1.43k
    Known.SignBit = false;
4726
1.43k
    return;
4727
1.43k
  }
4728
4729
  // Try to handle fixed width vector constants
4730
22.5M
  auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
4731
22.5M
  const Constant *CV = dyn_cast<Constant>(V);
4732
22.5M
  if (VFVTy && 
CV1.44M
) {
4733
33.2k
    Known.KnownFPClasses = fcNone;
4734
33.2k
    bool SignBitAllZero = true;
4735
33.2k
    bool SignBitAllOne = true;
4736
4737
    // For vectors, verify that each element is not NaN.
4738
33.2k
    unsigned NumElts = VFVTy->getNumElements();
4739
242k
    for (unsigned i = 0; i != NumElts; 
++i209k
) {
4740
209k
      if (!DemandedElts[i])
4741
4.87k
        continue;
4742
4743
204k
      Constant *Elt = CV->getAggregateElement(i);
4744
204k
      if (!Elt) {
4745
0
        Known = KnownFPClass();
4746
0
        return;
4747
0
      }
4748
204k
      if (isa<PoisonValue>(Elt))
4749
400
        continue;
4750
204k
      auto *CElt = dyn_cast<ConstantFP>(Elt);
4751
204k
      if (!CElt) {
4752
31
        Known = KnownFPClass();
4753
31
        return;
4754
31
      }
4755
4756
204k
      const APFloat &C = CElt->getValueAPF();
4757
204k
      Known.KnownFPClasses |= C.classify();
4758
204k
      if (C.isNegative())
4759
23.6k
        SignBitAllZero = false;
4760
180k
      else
4761
180k
        SignBitAllOne = false;
4762
204k
    }
4763
33.1k
    if (SignBitAllOne != SignBitAllZero)
4764
33.1k
      Known.SignBit = SignBitAllOne;
4765
33.1k
    return;
4766
33.2k
  }
4767
4768
22.5M
  FPClassTest KnownNotFromFlags = fcNone;
4769
22.5M
  if (const auto *CB = dyn_cast<CallBase>(V))
4770
3.07M
    KnownNotFromFlags |= CB->getRetNoFPClass();
4771
19.4M
  else if (const auto *Arg = dyn_cast<Argument>(V))
4772
2.35M
    KnownNotFromFlags |= Arg->getNoFPClass();
4773
4774
22.5M
  const Operator *Op = dyn_cast<Operator>(V);
4775
22.5M
  if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) {
4776
12.4M
    if (FPOp->hasNoNaNs())
4777
246k
      KnownNotFromFlags |= fcNan;
4778
12.4M
    if (FPOp->hasNoInfs())
4779
246k
      KnownNotFromFlags |= fcInf;
4780
12.4M
  }
4781
4782
22.5M
  KnownFPClass AssumedClasses = computeKnownFPClassFromContext(V, Q);
4783
22.5M
  KnownNotFromFlags |= ~AssumedClasses.KnownFPClasses;
4784
4785
  // We no longer need to find out about these bits from inputs if we can
4786
  // assume this from flags/attributes.
4787
22.5M
  InterestedClasses &= ~KnownNotFromFlags;
4788
4789
22.5M
  auto ClearClassesFromFlags = make_scope_exit([=, &Known] {
4790
22.5M
    Known.knownNot(KnownNotFromFlags);
4791
22.5M
    if (!Known.SignBit && 
AssumedClasses.SignBit20.8M
) {
4792
88.5k
      if (*AssumedClasses.SignBit)
4793
57.3k
        Known.signBitMustBeOne();
4794
31.1k
      else
4795
31.1k
        Known.signBitMustBeZero();
4796
88.5k
    }
4797
22.5M
  });
4798
4799
22.5M
  if (!Op)
4800
2.35M
    return;
4801
4802
  // All recursive calls that increase depth must come after this.
4803
20.1M
  if (Depth == MaxAnalysisRecursionDepth)
4804
1.79M
    return;
4805
4806
18.3M
  const unsigned Opc = Op->getOpcode();
4807
18.3M
  switch (Opc) {
4808
250k
  case Instruction::FNeg: {
4809
250k
    computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
4810
250k
                        Known, Q, Depth + 1);
4811
250k
    Known.fneg();
4812
250k
    break;
4813
0
  }
4814
2.10M
  case Instruction::Select: {
4815
2.10M
    Value *Cond = Op->getOperand(0);
4816
2.10M
    Value *LHS = Op->getOperand(1);
4817
2.10M
    Value *RHS = Op->getOperand(2);
4818
4819
2.10M
    FPClassTest FilterLHS = fcAllFlags;
4820
2.10M
    FPClassTest FilterRHS = fcAllFlags;
4821
4822
2.10M
    Value *TestedValue = nullptr;
4823
2.10M
    FPClassTest MaskIfTrue = fcAllFlags;
4824
2.10M
    FPClassTest MaskIfFalse = fcAllFlags;
4825
2.10M
    uint64_t ClassVal = 0;
4826
2.10M
    const Function *F = cast<Instruction>(Op)->getFunction();
4827
2.10M
    CmpPredicate Pred;
4828
2.10M
    Value *CmpLHS, *CmpRHS;
4829
2.10M
    if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) {
4830
      // If the select filters out a value based on the class, it no longer
4831
      // participates in the class of the result
4832
4833
      // TODO: In some degenerate cases we can infer something if we try again
4834
      // without looking through sign operations.
4835
1.61M
      bool LookThroughFAbsFNeg = CmpLHS != LHS && 
CmpLHS != RHS1.31M
;
4836
1.61M
      std::tie(TestedValue, MaskIfTrue, MaskIfFalse) =
4837
1.61M
          fcmpImpliesClass(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg);
4838
1.61M
    } else 
if (482k
match(Cond,
4839
482k
                     m_Intrinsic<Intrinsic::is_fpclass>(
4840
482k
                         m_Value(TestedValue), m_ConstantInt(ClassVal)))) {
4841
27
      FPClassTest TestedMask = static_cast<FPClassTest>(ClassVal);
4842
27
      MaskIfTrue = TestedMask;
4843
27
      MaskIfFalse = ~TestedMask;
4844
27
    }
4845
4846
2.10M
    if (TestedValue == LHS) {
4847
      // match !isnan(x) ? x : y
4848
239k
      FilterLHS = MaskIfTrue;
4849
1.86M
    } else if (TestedValue == RHS) { // && IsExactClass
4850
      // match !isnan(x) ? y : x
4851
965k
      FilterRHS = MaskIfFalse;
4852
965k
    }
4853
4854
2.10M
    KnownFPClass Known2;
4855
2.10M
    computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known,
4856
2.10M
                        Q, Depth + 1);
4857
2.10M
    Known.KnownFPClasses &= FilterLHS;
4858
4859
2.10M
    computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS,
4860
2.10M
                        Known2, Q, Depth + 1);
4861
2.10M
    Known2.KnownFPClasses &= FilterRHS;
4862
4863
2.10M
    Known |= Known2;
4864
2.10M
    break;
4865
0
  }
4866
2.70M
  case Instruction::Call: {
4867
2.70M
    const CallInst *II = cast<CallInst>(Op);
4868
2.70M
    const Intrinsic::ID IID = II->getIntrinsicID();
4869
2.70M
    switch (IID) {
4870
1.13M
    case Intrinsic::fabs: {
4871
1.13M
      if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
4872
        // If we only care about the sign bit we don't need to inspect the
4873
        // operand.
4874
1.12M
        computeKnownFPClass(II->getArgOperand(0), DemandedElts,
4875
1.12M
                            InterestedClasses, Known, Q, Depth + 1);
4876
1.12M
      }
4877
4878
1.13M
      Known.fabs();
4879
1.13M
      break;
4880
0
    }
4881
14.6k
    case Intrinsic::copysign: {
4882
14.6k
      KnownFPClass KnownSign;
4883
4884
14.6k
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
4885
14.6k
                          Known, Q, Depth + 1);
4886
14.6k
      computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses,
4887
14.6k
                          KnownSign, Q, Depth + 1);
4888
14.6k
      Known.copysign(KnownSign);
4889
14.6k
      break;
4890
0
    }
4891
131k
    case Intrinsic::fma:
4892
728k
    case Intrinsic::fmuladd: {
4893
728k
      if ((InterestedClasses & fcNegative) == fcNone)
4894
21.9k
        break;
4895
4896
706k
      if (II->getArgOperand(0) != II->getArgOperand(1))
4897
587k
        break;
4898
4899
      // The multiply cannot be -0 and therefore the add can't be -0
4900
118k
      Known.knownNot(fcNegZero);
4901
4902
      // x * x + y is non-negative if y is non-negative.
4903
118k
      KnownFPClass KnownAddend;
4904
118k
      computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses,
4905
118k
                          KnownAddend, Q, Depth + 1);
4906
4907
118k
      if (KnownAddend.cannotBeOrderedLessThanZero())
4908
92.3k
        Known.knownNot(fcNegative);
4909
118k
      break;
4910
706k
    }
4911
48.8k
    case Intrinsic::sqrt:
4912
48.8k
    case Intrinsic::experimental_constrained_sqrt: {
4913
48.8k
      KnownFPClass KnownSrc;
4914
48.8k
      FPClassTest InterestedSrcs = InterestedClasses;
4915
48.8k
      if (InterestedClasses & fcNan)
4916
43.9k
        InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
4917
4918
48.8k
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
4919
48.8k
                          KnownSrc, Q, Depth + 1);
4920
4921
48.8k
      if (KnownSrc.isKnownNeverPosInfinity())
4922
602
        Known.knownNot(fcPosInf);
4923
48.8k
      if (KnownSrc.isKnownNever(fcSNan))
4924
3.49k
        Known.knownNot(fcSNan);
4925
4926
      // Any negative value besides -0 returns a nan.
4927
48.8k
      if (KnownSrc.isKnownNeverNaN() && 
KnownSrc.cannotBeOrderedLessThanZero()3.49k
)
4928
3.03k
        Known.knownNot(fcNan);
4929
4930
      // The only negative value that can be returned is -0 for -0 inputs.
4931
48.8k
      Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal);
4932
4933
      // If the input denormal mode could be PreserveSign, a negative
4934
      // subnormal input could produce a negative zero output.
4935
48.8k
      const Function *F = II->getFunction();
4936
48.8k
      const fltSemantics &FltSem =
4937
48.8k
          II->getType()->getScalarType()->getFltSemantics();
4938
4939
48.8k
      if (Q.IIQ.hasNoSignedZeros(II) ||
4940
48.8k
          
(47.0k
F47.0k
&&
4941
47.0k
           KnownSrc.isKnownNeverLogicalNegZero(F->getDenormalMode(FltSem))))
4942
39.4k
        Known.knownNot(fcNegZero);
4943
4944
48.8k
      break;
4945
48.8k
    }
4946
5.11k
    case Intrinsic::sin:
4947
10.0k
    case Intrinsic::cos: {
4948
      // Return NaN on infinite inputs.
4949
10.0k
      KnownFPClass KnownSrc;
4950
10.0k
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
4951
10.0k
                          KnownSrc, Q, Depth + 1);
4952
10.0k
      Known.knownNot(fcInf);
4953
10.0k
      if (KnownSrc.isKnownNeverNaN() && 
KnownSrc.isKnownNeverInfinity()406
)
4954
0
        Known.knownNot(fcNan);
4955
10.0k
      break;
4956
5.11k
    }
4957
8.47k
    case Intrinsic::maxnum:
4958
19.7k
    case Intrinsic::minnum:
4959
19.7k
    case Intrinsic::minimum:
4960
19.7k
    case Intrinsic::maximum:
4961
19.7k
    case Intrinsic::minimumnum:
4962
19.7k
    case Intrinsic::maximumnum: {
4963
19.7k
      KnownFPClass KnownLHS, KnownRHS;
4964
19.7k
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
4965
19.7k
                          KnownLHS, Q, Depth + 1);
4966
19.7k
      computeKnownFPClass(II->getArgOperand(1), DemandedElts, InterestedClasses,
4967
19.7k
                          KnownRHS, Q, Depth + 1);
4968
4969
19.7k
      bool NeverNaN = KnownLHS.isKnownNeverNaN() || 
KnownRHS.isKnownNeverNaN()17.9k
;
4970
19.7k
      Known = KnownLHS | KnownRHS;
4971
4972
      // If either operand is not NaN, the result is not NaN.
4973
19.7k
      if (NeverNaN &&
4974
19.7k
          
(7.70k
IID == Intrinsic::minnum7.70k
||
IID == Intrinsic::maxnum4.55k
||
4975
7.70k
           
IID == Intrinsic::minimumnum0
||
IID == Intrinsic::maximumnum0
))
4976
7.70k
        Known.knownNot(fcNan);
4977
4978
19.7k
      if (IID == Intrinsic::maxnum || 
IID == Intrinsic::maximumnum11.2k
) {
4979
        // If at least one operand is known to be positive, the result must be
4980
        // positive.
4981
8.47k
        if ((KnownLHS.cannotBeOrderedLessThanZero() &&
4982
8.47k
             
KnownLHS.isKnownNeverNaN()793
) ||
4983
8.47k
            
(7.78k
KnownRHS.cannotBeOrderedLessThanZero()7.78k
&&
4984
7.78k
             
KnownRHS.isKnownNeverNaN()3.77k
))
4985
4.24k
          Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
4986
11.2k
      } else if (IID == Intrinsic::maximum) {
4987
        // If at least one operand is known to be positive, the result must be
4988
        // positive.
4989
0
        if (KnownLHS.cannotBeOrderedLessThanZero() ||
4990
0
            KnownRHS.cannotBeOrderedLessThanZero())
4991
0
          Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
4992
11.2k
      } else if (IID == Intrinsic::minnum || 
IID == Intrinsic::minimumnum0
) {
4993
        // If at least one operand is known to be negative, the result must be
4994
        // negative.
4995
11.2k
        if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
4996
11.2k
             
KnownLHS.isKnownNeverNaN()1
) ||
4997
11.2k
            
(11.2k
KnownRHS.cannotBeOrderedGreaterThanZero()11.2k
&&
4998
11.2k
             
KnownRHS.isKnownNeverNaN()42
))
4999
43
          Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5000
11.2k
      } else 
if (0
IID == Intrinsic::minimum0
) {
5001
        // If at least one operand is known to be negative, the result must be
5002
        // negative.
5003
0
        if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
5004
0
            KnownRHS.cannotBeOrderedGreaterThanZero())
5005
0
          Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5006
0
      } else
5007
0
        llvm_unreachable("unhandled intrinsic");
5008
5009
      // Fixup zero handling if denormals could be returned as a zero.
5010
      //
5011
      // As there's no spec for denormal flushing, be conservative with the
5012
      // treatment of denormals that could be flushed to zero. For older
5013
      // subtargets on AMDGPU the min/max instructions would not flush the
5014
      // output and return the original value.
5015
      //
5016
19.7k
      if ((Known.KnownFPClasses & fcZero) != fcNone &&
5017
19.7k
          !Known.isKnownNeverSubnormal()) {
5018
18.3k
        const Function *Parent = II->getFunction();
5019
18.3k
        if (!Parent)
5020
0
          break;
5021
5022
18.3k
        DenormalMode Mode = Parent->getDenormalMode(
5023
18.3k
            II->getType()->getScalarType()->getFltSemantics());
5024
18.3k
        if (Mode != DenormalMode::getIEEE())
5025
0
          Known.KnownFPClasses |= fcZero;
5026
18.3k
      }
5027
5028
19.7k
      if (Known.isKnownNeverNaN()) {
5029
7.70k
        if (KnownLHS.SignBit && 
KnownRHS.SignBit867
&&
5030
7.70k
            
*KnownLHS.SignBit == *KnownRHS.SignBit672
) {
5031
672
          if (*KnownLHS.SignBit)
5032
0
            Known.signBitMustBeOne();
5033
672
          else
5034
672
            Known.signBitMustBeZero();
5035
7.03k
        } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum ||
5036
7.03k
                    IID == Intrinsic::maximumnum ||
5037
7.03k
                    IID == Intrinsic::minimumnum) ||
5038
                   // FIXME: Should be using logical zero versions
5039
7.03k
                   ((KnownLHS.isKnownNeverNegZero() ||
5040
7.03k
                     
KnownRHS.isKnownNeverPosZero()6.25k
) &&
5041
7.03k
                    
(5.06k
KnownLHS.isKnownNeverPosZero()5.06k
||
5042
5.06k
                     
KnownRHS.isKnownNeverNegZero()5.05k
))) {
5043
4.90k
          if ((IID == Intrinsic::maximum || IID == Intrinsic::maximumnum ||
5044
4.90k
               IID == Intrinsic::maxnum) &&
5045
4.90k
              
(2.12k
KnownLHS.SignBit == false2.12k
||
KnownRHS.SignBit == false2.12k
))
5046
1.87k
            Known.signBitMustBeZero();
5047
3.02k
          else if ((IID == Intrinsic::minimum || IID == Intrinsic::minimumnum ||
5048
3.02k
                    IID == Intrinsic::minnum) &&
5049
3.02k
                   
(2.78k
KnownLHS.SignBit == true2.78k
||
KnownRHS.SignBit == true2.78k
))
5050
0
            Known.signBitMustBeOne();
5051
4.90k
        }
5052
7.70k
      }
5053
19.7k
      break;
5054
19.7k
    }
5055
0
    case Intrinsic::canonicalize: {
5056
0
      KnownFPClass KnownSrc;
5057
0
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5058
0
                          KnownSrc, Q, Depth + 1);
5059
5060
      // This is essentially a stronger form of
5061
      // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
5062
      // actually have an IR canonicalization guarantee.
5063
5064
      // Canonicalize may flush denormals to zero, so we have to consider the
5065
      // denormal mode to preserve known-not-0 knowledge.
5066
0
      Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
5067
5068
      // Stronger version of propagateNaN
5069
      // Canonicalize is guaranteed to quiet signaling nans.
5070
0
      if (KnownSrc.isKnownNeverNaN())
5071
0
        Known.knownNot(fcNan);
5072
0
      else
5073
0
        Known.knownNot(fcSNan);
5074
5075
0
      const Function *F = II->getFunction();
5076
0
      if (!F)
5077
0
        break;
5078
5079
      // If the parent function flushes denormals, the canonical output cannot
5080
      // be a denormal.
5081
0
      const fltSemantics &FPType =
5082
0
          II->getType()->getScalarType()->getFltSemantics();
5083
0
      DenormalMode DenormMode = F->getDenormalMode(FPType);
5084
0
      if (DenormMode == DenormalMode::getIEEE()) {
5085
0
        if (KnownSrc.isKnownNever(fcPosZero))
5086
0
          Known.knownNot(fcPosZero);
5087
0
        if (KnownSrc.isKnownNever(fcNegZero))
5088
0
          Known.knownNot(fcNegZero);
5089
0
        break;
5090
0
      }
5091
5092
0
      if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
5093
0
        Known.knownNot(fcSubnormal);
5094
5095
0
      if (DenormMode.Input == DenormalMode::PositiveZero ||
5096
0
          (DenormMode.Output == DenormalMode::PositiveZero &&
5097
0
           DenormMode.Input == DenormalMode::IEEE))
5098
0
        Known.knownNot(fcNegZero);
5099
5100
0
      break;
5101
0
    }
5102
1
    case Intrinsic::vector_reduce_fmax:
5103
1
    case Intrinsic::vector_reduce_fmin:
5104
1
    case Intrinsic::vector_reduce_fmaximum:
5105
1
    case Intrinsic::vector_reduce_fminimum: {
5106
      // reduce min/max will choose an element from one of the vector elements,
5107
      // so we can infer and class information that is common to all elements.
5108
1
      Known = computeKnownFPClass(II->getArgOperand(0), II->getFastMathFlags(),
5109
1
                                  InterestedClasses, Q, Depth + 1);
5110
      // Can only propagate sign if output is never NaN.
5111
1
      if (!Known.isKnownNeverNaN())
5112
0
        Known.SignBit.reset();
5113
1
      break;
5114
1
    }
5115
      // reverse preserves all characteristics of the input vec's element.
5116
0
    case Intrinsic::vector_reverse:
5117
0
      Known = computeKnownFPClass(
5118
0
          II->getArgOperand(0), DemandedElts.reverseBits(),
5119
0
          II->getFastMathFlags(), InterestedClasses, Q, Depth + 1);
5120
0
      break;
5121
2.41k
    case Intrinsic::trunc:
5122
49.4k
    case Intrinsic::floor:
5123
67.3k
    case Intrinsic::ceil:
5124
70.5k
    case Intrinsic::rint:
5125
70.7k
    case Intrinsic::nearbyint:
5126
86.5k
    case Intrinsic::round:
5127
86.5k
    case Intrinsic::roundeven: {
5128
86.5k
      KnownFPClass KnownSrc;
5129
86.5k
      FPClassTest InterestedSrcs = InterestedClasses;
5130
86.5k
      if (InterestedSrcs & fcPosFinite)
5131
77.6k
        InterestedSrcs |= fcPosFinite;
5132
86.5k
      if (InterestedSrcs & fcNegFinite)
5133
73.7k
        InterestedSrcs |= fcNegFinite;
5134
86.5k
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
5135
86.5k
                          KnownSrc, Q, Depth + 1);
5136
5137
      // Integer results cannot be subnormal.
5138
86.5k
      Known.knownNot(fcSubnormal);
5139
5140
86.5k
      Known.propagateNaN(KnownSrc, true);
5141
5142
      // Pass through infinities, except PPC_FP128 is a special case for
5143
      // intrinsics other than trunc.
5144
86.5k
      if (IID == Intrinsic::trunc || 
!V->getType()->isMultiUnitFPType()84.1k
) {
5145
86.5k
        if (KnownSrc.isKnownNeverPosInfinity())
5146
12.1k
          Known.knownNot(fcPosInf);
5147
86.5k
        if (KnownSrc.isKnownNeverNegInfinity())
5148
13.4k
          Known.knownNot(fcNegInf);
5149
86.5k
      }
5150
5151
      // Negative round ups to 0 produce -0
5152
86.5k
      if (KnownSrc.isKnownNever(fcPosFinite))
5153
972
        Known.knownNot(fcPosFinite);
5154
86.5k
      if (KnownSrc.isKnownNever(fcNegFinite))
5155
2.20k
        Known.knownNot(fcNegFinite);
5156
5157
86.5k
      break;
5158
86.5k
    }
5159
4.06k
    case Intrinsic::exp:
5160
4.42k
    case Intrinsic::exp2:
5161
4.42k
    case Intrinsic::exp10: {
5162
4.42k
      Known.knownNot(fcNegative);
5163
4.42k
      if ((InterestedClasses & fcNan) == fcNone)
5164
93
        break;
5165
5166
4.33k
      KnownFPClass KnownSrc;
5167
4.33k
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5168
4.33k
                          KnownSrc, Q, Depth + 1);
5169
4.33k
      if (KnownSrc.isKnownNeverNaN()) {
5170
729
        Known.knownNot(fcNan);
5171
729
        Known.signBitMustBeZero();
5172
729
      }
5173
5174
4.33k
      break;
5175
4.42k
    }
5176
0
    case Intrinsic::fptrunc_round: {
5177
0
      computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
5178
0
                                    Q, Depth);
5179
0
      break;
5180
4.42k
    }
5181
2.90k
    case Intrinsic::log:
5182
3.52k
    case Intrinsic::log10:
5183
4.96k
    case Intrinsic::log2:
5184
4.96k
    case Intrinsic::experimental_constrained_log:
5185
4.96k
    case Intrinsic::experimental_constrained_log10:
5186
4.96k
    case Intrinsic::experimental_constrained_log2: {
5187
      // log(+inf) -> +inf
5188
      // log([+-]0.0) -> -inf
5189
      // log(-inf) -> nan
5190
      // log(-x) -> nan
5191
4.96k
      if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
5192
951
        break;
5193
5194
4.01k
      FPClassTest InterestedSrcs = InterestedClasses;
5195
4.01k
      if ((InterestedClasses & fcNegInf) != fcNone)
5196
3.95k
        InterestedSrcs |= fcZero | fcSubnormal;
5197
4.01k
      if ((InterestedClasses & fcNan) != fcNone)
5198
3.80k
        InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
5199
5200
4.01k
      KnownFPClass KnownSrc;
5201
4.01k
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
5202
4.01k
                          KnownSrc, Q, Depth + 1);
5203
5204
4.01k
      if (KnownSrc.isKnownNeverPosInfinity())
5205
92
        Known.knownNot(fcPosInf);
5206
5207
4.01k
      if (KnownSrc.isKnownNeverNaN() && 
KnownSrc.cannotBeOrderedLessThanZero()1.21k
)
5208
1.19k
        Known.knownNot(fcNan);
5209
5210
4.01k
      const Function *F = II->getFunction();
5211
5212
4.01k
      if (!F)
5213
0
        break;
5214
5215
4.01k
      const fltSemantics &FltSem =
5216
4.01k
          II->getType()->getScalarType()->getFltSemantics();
5217
4.01k
      DenormalMode Mode = F->getDenormalMode(FltSem);
5218
5219
4.01k
      if (KnownSrc.isKnownNeverLogicalZero(Mode))
5220
460
        Known.knownNot(fcNegInf);
5221
5222
4.01k
      break;
5223
4.01k
    }
5224
1.02k
    case Intrinsic::powi: {
5225
1.02k
      if ((InterestedClasses & fcNegative) == fcNone)
5226
7
        break;
5227
5228
1.01k
      const Value *Exp = II->getArgOperand(1);
5229
1.01k
      Type *ExpTy = Exp->getType();
5230
1.01k
      unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth();
5231
1.01k
      KnownBits ExponentKnownBits(BitWidth);
5232
1.01k
      computeKnownBits(Exp, isa<VectorType>(ExpTy) ? 
DemandedElts0
: APInt(1, 1),
5233
1.01k
                       ExponentKnownBits, Q, Depth + 1);
5234
5235
1.01k
      if (ExponentKnownBits.Zero[0]) { // Is even
5236
14
        Known.knownNot(fcNegative);
5237
14
        break;
5238
14
      }
5239
5240
      // Given that exp is an integer, here are the
5241
      // ways that pow can return a negative value:
5242
      //
5243
      //   pow(-x, exp)   --> negative if exp is odd and x is negative.
5244
      //   pow(-0, exp)   --> -inf if exp is negative odd.
5245
      //   pow(-0, exp)   --> -0 if exp is positive odd.
5246
      //   pow(-inf, exp) --> -0 if exp is negative odd.
5247
      //   pow(-inf, exp) --> -inf if exp is positive odd.
5248
1.00k
      KnownFPClass KnownSrc;
5249
1.00k
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative,
5250
1.00k
                          KnownSrc, Q, Depth + 1);
5251
1.00k
      if (KnownSrc.isKnownNever(fcNegative))
5252
888
        Known.knownNot(fcNegative);
5253
1.00k
      break;
5254
1.01k
    }
5255
136
    case Intrinsic::ldexp: {
5256
136
      KnownFPClass KnownSrc;
5257
136
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5258
136
                          KnownSrc, Q, Depth + 1);
5259
136
      Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
5260
5261
      // Sign is preserved, but underflows may produce zeroes.
5262
136
      if (KnownSrc.isKnownNever(fcNegative))
5263
136
        Known.knownNot(fcNegative);
5264
0
      else if (KnownSrc.cannotBeOrderedLessThanZero())
5265
0
        Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5266
5267
136
      if (KnownSrc.isKnownNever(fcPositive))
5268
0
        Known.knownNot(fcPositive);
5269
136
      else if (KnownSrc.cannotBeOrderedGreaterThanZero())
5270
0
        Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5271
5272
      // Can refine inf/zero handling based on the exponent operand.
5273
136
      const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
5274
136
      if ((InterestedClasses & ExpInfoMask) == fcNone)
5275
0
        break;
5276
136
      if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
5277
136
        break;
5278
5279
0
      const fltSemantics &Flt =
5280
0
          II->getType()->getScalarType()->getFltSemantics();
5281
0
      unsigned Precision = APFloat::semanticsPrecision(Flt);
5282
0
      const Value *ExpArg = II->getArgOperand(1);
5283
0
      ConstantRange ExpRange = computeConstantRange(
5284
0
          ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1);
5285
5286
0
      const int MantissaBits = Precision - 1;
5287
0
      if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits)))
5288
0
        Known.knownNot(fcSubnormal);
5289
5290
0
      const Function *F = II->getFunction();
5291
0
      const APInt *ConstVal = ExpRange.getSingleElement();
5292
0
      const fltSemantics &FltSem =
5293
0
          II->getType()->getScalarType()->getFltSemantics();
5294
0
      if (ConstVal && ConstVal->isZero()) {
5295
        // ldexp(x, 0) -> x, so propagate everything.
5296
0
        Known.propagateCanonicalizingSrc(KnownSrc, F->getDenormalMode(FltSem));
5297
0
      } else if (ExpRange.isAllNegative()) {
5298
        // If we know the power is <= 0, can't introduce inf
5299
0
        if (KnownSrc.isKnownNeverPosInfinity())
5300
0
          Known.knownNot(fcPosInf);
5301
0
        if (KnownSrc.isKnownNeverNegInfinity())
5302
0
          Known.knownNot(fcNegInf);
5303
0
      } else if (ExpRange.isAllNonNegative()) {
5304
        // If we know the power is >= 0, can't introduce subnormal or zero
5305
0
        if (KnownSrc.isKnownNeverPosSubnormal())
5306
0
          Known.knownNot(fcPosSubnormal);
5307
0
        if (KnownSrc.isKnownNeverNegSubnormal())
5308
0
          Known.knownNot(fcNegSubnormal);
5309
0
        if (F &&
5310
0
            KnownSrc.isKnownNeverLogicalPosZero(F->getDenormalMode(FltSem)))
5311
0
          Known.knownNot(fcPosZero);
5312
0
        if (F &&
5313
0
            KnownSrc.isKnownNeverLogicalNegZero(F->getDenormalMode(FltSem)))
5314
0
          Known.knownNot(fcNegZero);
5315
0
      }
5316
5317
0
      break;
5318
136
    }
5319
0
    case Intrinsic::arithmetic_fence: {
5320
0
      computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedClasses,
5321
0
                          Known, Q, Depth + 1);
5322
0
      break;
5323
136
    }
5324
0
    case Intrinsic::experimental_constrained_sitofp:
5325
0
    case Intrinsic::experimental_constrained_uitofp:
5326
      // Cannot produce nan
5327
0
      Known.knownNot(fcNan);
5328
5329
      // sitofp and uitofp turn into +0.0 for zero.
5330
0
      Known.knownNot(fcNegZero);
5331
5332
      // Integers cannot be subnormal
5333
0
      Known.knownNot(fcSubnormal);
5334
5335
0
      if (IID == Intrinsic::experimental_constrained_uitofp)
5336
0
        Known.signBitMustBeZero();
5337
5338
      // TODO: Copy inf handling from instructions
5339
0
      break;
5340
648k
    default:
5341
648k
      break;
5342
2.70M
    }
5343
5344
2.70M
    break;
5345
2.70M
  }
5346
2.70M
  case Instruction::FAdd:
5347
2.21M
  case Instruction::FSub: {
5348
2.21M
    KnownFPClass KnownLHS, KnownRHS;
5349
2.21M
    bool WantNegative =
5350
2.21M
        Op->getOpcode() == Instruction::FAdd &&
5351
2.21M
        
(InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone1.31M
;
5352
2.21M
    bool WantNaN = (InterestedClasses & fcNan) != fcNone;
5353
2.21M
    bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
5354
5355
2.21M
    if (!WantNaN && 
!WantNegative587k
&&
!WantNegZero378k
)
5356
74.4k
      break;
5357
5358
2.14M
    FPClassTest InterestedSrcs = InterestedClasses;
5359
2.14M
    if (WantNegative)
5360
1.09M
      InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
5361
2.14M
    if (InterestedClasses & fcNan)
5362
1.62M
      InterestedSrcs |= fcInf;
5363
2.14M
    computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs,
5364
2.14M
                        KnownRHS, Q, Depth + 1);
5365
5366
2.14M
    if ((WantNaN && 
KnownRHS.isKnownNeverNaN()1.62M
) ||
5367
2.14M
        
(1.77M
WantNegative1.77M
&&
KnownRHS.cannotBeOrderedLessThanZero()801k
) ||
5368
2.14M
        
WantNegZero1.65M
||
Opc == Instruction::FSub137k
) {
5369
5370
      // RHS is canonically cheaper to compute. Skip inspecting the LHS if
5371
      // there's no point.
5372
2.03M
      computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs,
5373
2.03M
                          KnownLHS, Q, Depth + 1);
5374
      // Adding positive and negative infinity produces NaN.
5375
      // TODO: Check sign of infinities.
5376
2.03M
      if (KnownLHS.isKnownNeverNaN() && 
KnownRHS.isKnownNeverNaN()269k
&&
5377
2.03M
          
(160k
KnownLHS.isKnownNeverInfinity()160k
||
KnownRHS.isKnownNeverInfinity()38.7k
))
5378
146k
        Known.knownNot(fcNan);
5379
5380
      // FIXME: Context function should always be passed in separately
5381
2.03M
      const Function *F = cast<Instruction>(Op)->getFunction();
5382
5383
2.03M
      if (Op->getOpcode() == Instruction::FAdd) {
5384
1.17M
        if (KnownLHS.cannotBeOrderedLessThanZero() &&
5385
1.17M
            
KnownRHS.cannotBeOrderedLessThanZero()86.5k
)
5386
55.2k
          Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5387
1.17M
        if (!F)
5388
0
          break;
5389
5390
1.17M
        const fltSemantics &FltSem =
5391
1.17M
            Op->getType()->getScalarType()->getFltSemantics();
5392
1.17M
        DenormalMode Mode = F->getDenormalMode(FltSem);
5393
5394
        // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
5395
1.17M
        if ((KnownLHS.isKnownNeverLogicalNegZero(Mode) ||
5396
1.17M
             
KnownRHS.isKnownNeverLogicalNegZero(Mode)1.02M
) &&
5397
            // Make sure output negative denormal can't flush to -0
5398
1.17M
            
outputDenormalIsIEEEOrPosZero(*F, Op->getType())495k
)
5399
495k
          Known.knownNot(fcNegZero);
5400
1.17M
      } else {
5401
854k
        if (!F)
5402
0
          break;
5403
5404
854k
        const fltSemantics &FltSem =
5405
854k
            Op->getType()->getScalarType()->getFltSemantics();
5406
854k
        DenormalMode Mode = F->getDenormalMode(FltSem);
5407
5408
        // Only fsub -0, +0 can return -0
5409
854k
        if ((KnownLHS.isKnownNeverLogicalNegZero(Mode) ||
5410
854k
             
KnownRHS.isKnownNeverLogicalPosZero(Mode)691k
) &&
5411
            // Make sure output negative denormal can't flush to -0
5412
854k
            
outputDenormalIsIEEEOrPosZero(*F, Op->getType())170k
)
5413
170k
          Known.knownNot(fcNegZero);
5414
854k
      }
5415
2.03M
    }
5416
5417
2.14M
    break;
5418
2.14M
  }
5419
2.14M
  case Instruction::FMul: {
5420
    // X * X is always non-negative or a NaN.
5421
1.72M
    if (Op->getOperand(0) == Op->getOperand(1))
5422
165k
      Known.knownNot(fcNegative);
5423
5424
1.72M
    if ((InterestedClasses & fcNan) != fcNan)
5425
536k
      break;
5426
5427
    // fcSubnormal is only needed in case of DAZ.
5428
1.18M
    const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
5429
5430
1.18M
    KnownFPClass KnownLHS, KnownRHS;
5431
1.18M
    computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS,
5432
1.18M
                        Q, Depth + 1);
5433
1.18M
    if (!KnownRHS.isKnownNeverNaN())
5434
701k
      break;
5435
5436
486k
    computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS,
5437
486k
                        Q, Depth + 1);
5438
486k
    if (!KnownLHS.isKnownNeverNaN())
5439
406k
      break;
5440
5441
79.9k
    if (KnownLHS.SignBit && 
KnownRHS.SignBit45.4k
) {
5442
45.1k
      if (*KnownLHS.SignBit == *KnownRHS.SignBit)
5443
44.9k
        Known.signBitMustBeZero();
5444
157
      else
5445
157
        Known.signBitMustBeOne();
5446
45.1k
    }
5447
5448
    // If 0 * +/-inf produces NaN.
5449
79.9k
    if (KnownLHS.isKnownNeverInfinity() && 
KnownRHS.isKnownNeverInfinity()63.9k
) {
5450
63.8k
      Known.knownNot(fcNan);
5451
63.8k
      break;
5452
63.8k
    }
5453
5454
16.1k
    const Function *F = cast<Instruction>(Op)->getFunction();
5455
16.1k
    if (!F)
5456
0
      break;
5457
5458
16.1k
    Type *OpTy = Op->getType()->getScalarType();
5459
16.1k
    const fltSemantics &FltSem = OpTy->getFltSemantics();
5460
16.1k
    DenormalMode Mode = F->getDenormalMode(FltSem);
5461
5462
16.1k
    if ((KnownRHS.isKnownNeverInfinity() ||
5463
16.1k
         
KnownLHS.isKnownNeverLogicalZero(Mode)6.50k
) &&
5464
16.1k
        
(9.82k
KnownLHS.isKnownNeverInfinity()9.82k
||
5465
9.82k
         
KnownRHS.isKnownNeverLogicalZero(Mode)9.80k
))
5466
7.21k
      Known.knownNot(fcNan);
5467
5468
16.1k
    break;
5469
16.1k
  }
5470
567k
  case Instruction::FDiv:
5471
571k
  case Instruction::FRem: {
5472
571k
    if (Op->getOperand(0) == Op->getOperand(1)) {
5473
      // TODO: Could filter out snan if we inspect the operand
5474
111
      if (Op->getOpcode() == Instruction::FDiv) {
5475
        // X / X is always exactly 1.0 or a NaN.
5476
111
        Known.KnownFPClasses = fcNan | fcPosNormal;
5477
111
      } else {
5478
        // X % X is always exactly [+-]0.0 or a NaN.
5479
0
        Known.KnownFPClasses = fcNan | fcZero;
5480
0
      }
5481
5482
111
      break;
5483
111
    }
5484
5485
571k
    const bool WantNan = (InterestedClasses & fcNan) != fcNone;
5486
571k
    const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
5487
571k
    const bool WantPositive =
5488
571k
        Opc == Instruction::FRem && 
(InterestedClasses & fcPositive) != fcNone3.77k
;
5489
571k
    if (!WantNan && 
!WantNegative141k
&&
!WantPositive19.6k
)
5490
19.5k
      break;
5491
5492
551k
    KnownFPClass KnownLHS, KnownRHS;
5493
5494
551k
    computeKnownFPClass(Op->getOperand(1), DemandedElts,
5495
551k
                        fcNan | fcInf | fcZero | fcNegative, KnownRHS, Q,
5496
551k
                        Depth + 1);
5497
5498
551k
    bool KnowSomethingUseful =
5499
551k
        KnownRHS.isKnownNeverNaN() || 
KnownRHS.isKnownNever(fcNegative)406k
;
5500
5501
551k
    if (KnowSomethingUseful || 
WantPositive378k
) {
5502
173k
      const FPClassTest InterestedLHS =
5503
173k
          WantPositive ? 
fcAllFlags3.60k
5504
173k
                       : 
fcNan | fcInf | fcZero | fcSubnormal | fcNegative170k
;
5505
5506
173k
      computeKnownFPClass(Op->getOperand(0), DemandedElts,
5507
173k
                          InterestedClasses & InterestedLHS, KnownLHS, Q,
5508
173k
                          Depth + 1);
5509
173k
    }
5510
5511
551k
    const Function *F = cast<Instruction>(Op)->getFunction();
5512
551k
    const fltSemantics &FltSem =
5513
551k
        Op->getType()->getScalarType()->getFltSemantics();
5514
5515
551k
    if (Op->getOpcode() == Instruction::FDiv) {
5516
      // Only 0/0, Inf/Inf produce NaN.
5517
548k
      if (KnownLHS.isKnownNeverNaN() && 
KnownRHS.isKnownNeverNaN()49.4k
&&
5518
548k
          
(41.0k
KnownLHS.isKnownNeverInfinity()41.0k
||
5519
41.0k
           
KnownRHS.isKnownNeverInfinity()16.5k
) &&
5520
548k
          
(38.3k
(38.3k
F38.3k
&&
5521
38.3k
            KnownLHS.isKnownNeverLogicalZero(F->getDenormalMode(FltSem))) ||
5522
38.3k
           
(32.5k
F32.5k
&&
5523
32.5k
            KnownRHS.isKnownNeverLogicalZero(F->getDenormalMode(FltSem))))) {
5524
19.2k
        Known.knownNot(fcNan);
5525
19.2k
      }
5526
5527
      // X / -0.0 is -Inf (or NaN).
5528
      // +X / +X is +X
5529
548k
      if (KnownLHS.isKnownNever(fcNegative) && 
KnownRHS.isKnownNever(fcNegative)34.0k
)
5530
29.1k
        Known.knownNot(fcNegative);
5531
548k
    } else {
5532
      // Inf REM x and x REM 0 produce NaN.
5533
3.76k
      if (KnownLHS.isKnownNeverNaN() && 
KnownRHS.isKnownNeverNaN()185
&&
5534
3.76k
          
KnownLHS.isKnownNeverInfinity()176
&&
F161
&&
5535
3.76k
          
KnownRHS.isKnownNeverLogicalZero(F->getDenormalMode(FltSem))161
) {
5536
161
        Known.knownNot(fcNan);
5537
161
      }
5538
5539
      // The sign for frem is the same as the first operand.
5540
3.76k
      if (KnownLHS.cannotBeOrderedLessThanZero())
5541
0
        Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
5542
3.76k
      if (KnownLHS.cannotBeOrderedGreaterThanZero())
5543
0
        Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
5544
5545
      // See if we can be more aggressive about the sign of 0.
5546
3.76k
      if (KnownLHS.isKnownNever(fcNegative))
5547
0
        Known.knownNot(fcNegative);
5548
3.76k
      if (KnownLHS.isKnownNever(fcPositive))
5549
0
        Known.knownNot(fcPositive);
5550
3.76k
    }
5551
5552
551k
    break;
5553
571k
  }
5554
90.6k
  case Instruction::FPExt: {
5555
    // Infinity, nan and zero propagate from source.
5556
90.6k
    computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
5557
90.6k
                        Known, Q, Depth + 1);
5558
5559
90.6k
    const fltSemantics &DstTy =
5560
90.6k
        Op->getType()->getScalarType()->getFltSemantics();
5561
90.6k
    const fltSemantics &SrcTy =
5562
90.6k
        Op->getOperand(0)->getType()->getScalarType()->getFltSemantics();
5563
5564
    // All subnormal inputs should be in the normal range in the result type.
5565
90.6k
    if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy)) {
5566
90.6k
      if (Known.KnownFPClasses & fcPosSubnormal)
5567
88.2k
        Known.KnownFPClasses |= fcPosNormal;
5568
90.6k
      if (Known.KnownFPClasses & fcNegSubnormal)
5569
84.6k
        Known.KnownFPClasses |= fcNegNormal;
5570
90.6k
      Known.knownNot(fcSubnormal);
5571
90.6k
    }
5572
5573
    // Sign bit of a nan isn't guaranteed.
5574
90.6k
    if (!Known.isKnownNeverNaN())
5575
88.0k
      Known.SignBit = std::nullopt;
5576
90.6k
    break;
5577
571k
  }
5578
246k
  case Instruction::FPTrunc: {
5579
246k
    computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known, Q,
5580
246k
                                  Depth);
5581
246k
    break;
5582
571k
  }
5583
270k
  case Instruction::SIToFP:
5584
469k
  case Instruction::UIToFP: {
5585
    // Cannot produce nan
5586
469k
    Known.knownNot(fcNan);
5587
5588
    // Integers cannot be subnormal
5589
469k
    Known.knownNot(fcSubnormal);
5590
5591
    // sitofp and uitofp turn into +0.0 for zero.
5592
469k
    Known.knownNot(fcNegZero);
5593
469k
    if (Op->getOpcode() == Instruction::UIToFP)
5594
198k
      Known.signBitMustBeZero();
5595
5596
469k
    if (InterestedClasses & fcInf) {
5597
      // Get width of largest magnitude integer (remove a bit if signed).
5598
      // This still works for a signed minimum value because the largest FP
5599
      // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
5600
373k
      int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits();
5601
373k
      if (Op->getOpcode() == Instruction::SIToFP)
5602
199k
        --IntSize;
5603
5604
      // If the exponent of the largest finite FP value can hold the largest
5605
      // integer, the result of the cast must be finite.
5606
373k
      Type *FPTy = Op->getType()->getScalarType();
5607
373k
      if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize)
5608
373k
        Known.knownNot(fcInf);
5609
373k
    }
5610
5611
469k
    break;
5612
270k
  }
5613
221k
  case Instruction::ExtractElement: {
5614
    // Look through extract element. If the index is non-constant or
5615
    // out-of-range demand all elements, otherwise just the extracted element.
5616
221k
    const Value *Vec = Op->getOperand(0);
5617
5618
221k
    APInt DemandedVecElts;
5619
221k
    if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
5620
221k
      unsigned NumElts = VecTy->getNumElements();
5621
221k
      DemandedVecElts = APInt::getAllOnes(NumElts);
5622
221k
      auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(1));
5623
221k
      if (CIdx && CIdx->getValue().ult(NumElts))
5624
221k
        DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
5625
221k
    } else {
5626
0
      DemandedVecElts = APInt(1, 1);
5627
0
    }
5628
5629
221k
    return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known,
5630
221k
                               Q, Depth + 1);
5631
270k
  }
5632
140k
  case Instruction::InsertElement: {
5633
140k
    if (isa<ScalableVectorType>(Op->getType()))
5634
0
      return;
5635
5636
140k
    const Value *Vec = Op->getOperand(0);
5637
140k
    const Value *Elt = Op->getOperand(1);
5638
140k
    auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2));
5639
140k
    unsigned NumElts = DemandedElts.getBitWidth();
5640
140k
    APInt DemandedVecElts = DemandedElts;
5641
140k
    bool NeedsElt = true;
5642
    // If we know the index we are inserting to, clear it from Vec check.
5643
140k
    if (CIdx && CIdx->getValue().ult(NumElts)) {
5644
140k
      DemandedVecElts.clearBit(CIdx->getZExtValue());
5645
140k
      NeedsElt = DemandedElts[CIdx->getZExtValue()];
5646
140k
    }
5647
5648
    // Do we demand the inserted element?
5649
140k
    if (NeedsElt) {
5650
124k
      computeKnownFPClass(Elt, Known, InterestedClasses, Q, Depth + 1);
5651
      // If we don't know any bits, early out.
5652
124k
      if (Known.isUnknown())
5653
114k
        break;
5654
124k
    } else {
5655
16.2k
      Known.KnownFPClasses = fcNone;
5656
16.2k
    }
5657
5658
    // Do we need anymore elements from Vec?
5659
26.6k
    if (!DemandedVecElts.isZero()) {
5660
19.1k
      KnownFPClass Known2;
5661
19.1k
      computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, Q,
5662
19.1k
                          Depth + 1);
5663
19.1k
      Known |= Known2;
5664
19.1k
    }
5665
5666
26.6k
    break;
5667
140k
  }
5668
130k
  case Instruction::ShuffleVector: {
5669
    // For undef elements, we don't know anything about the common state of
5670
    // the shuffle result.
5671
130k
    APInt DemandedLHS, DemandedRHS;
5672
130k
    auto *Shuf = dyn_cast<ShuffleVectorInst>(Op);
5673
130k
    if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
5674
217
      return;
5675
5676
130k
    if (!!DemandedLHS) {
5677
129k
      const Value *LHS = Shuf->getOperand(0);
5678
129k
      computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known, Q,
5679
129k
                          Depth + 1);
5680
5681
      // If we don't know any bits, early out.
5682
129k
      if (Known.isUnknown())
5683
117k
        break;
5684
129k
    } else {
5685
551
      Known.KnownFPClasses = fcNone;
5686
551
    }
5687
5688
12.5k
    if (!!DemandedRHS) {
5689
2.55k
      KnownFPClass Known2;
5690
2.55k
      const Value *RHS = Shuf->getOperand(1);
5691
2.55k
      computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2, Q,
5692
2.55k
                          Depth + 1);
5693
2.55k
      Known |= Known2;
5694
2.55k
    }
5695
5696
12.5k
    break;
5697
130k
  }
5698
44.4k
  case Instruction::ExtractValue: {
5699
44.4k
    const ExtractValueInst *Extract = cast<ExtractValueInst>(Op);
5700
44.4k
    ArrayRef<unsigned> Indices = Extract->getIndices();
5701
44.4k
    const Value *Src = Extract->getAggregateOperand();
5702
44.4k
    if (isa<StructType>(Src->getType()) && Indices.size() == 1 &&
5703
44.4k
        Indices[0] == 0) {
5704
18.1k
      if (const auto *II = dyn_cast<IntrinsicInst>(Src)) {
5705
439
        switch (II->getIntrinsicID()) {
5706
431
        case Intrinsic::frexp: {
5707
431
          Known.knownNot(fcSubnormal);
5708
5709
431
          KnownFPClass KnownSrc;
5710
431
          computeKnownFPClass(II->getArgOperand(0), DemandedElts,
5711
431
                              InterestedClasses, KnownSrc, Q, Depth + 1);
5712
5713
431
          const Function *F = cast<Instruction>(Op)->getFunction();
5714
431
          const fltSemantics &FltSem =
5715
431
              Op->getType()->getScalarType()->getFltSemantics();
5716
5717
431
          if (KnownSrc.isKnownNever(fcNegative))
5718
0
            Known.knownNot(fcNegative);
5719
431
          else {
5720
431
            if (F &&
5721
431
                KnownSrc.isKnownNeverLogicalNegZero(F->getDenormalMode(FltSem)))
5722
0
              Known.knownNot(fcNegZero);
5723
431
            if (KnownSrc.isKnownNever(fcNegInf))
5724
184
              Known.knownNot(fcNegInf);
5725
431
          }
5726
5727
431
          if (KnownSrc.isKnownNever(fcPositive))
5728
0
            Known.knownNot(fcPositive);
5729
431
          else {
5730
431
            if (F &&
5731
431
                KnownSrc.isKnownNeverLogicalPosZero(F->getDenormalMode(FltSem)))
5732
0
              Known.knownNot(fcPosZero);
5733
431
            if (KnownSrc.isKnownNever(fcPosInf))
5734
184
              Known.knownNot(fcPosInf);
5735
431
          }
5736
5737
431
          Known.propagateNaN(KnownSrc);
5738
431
          return;
5739
0
        }
5740
8
        default:
5741
8
          break;
5742
439
        }
5743
439
      }
5744
18.1k
    }
5745
5746
44.0k
    computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Q,
5747
44.0k
                        Depth + 1);
5748
44.0k
    break;
5749
44.4k
  }
5750
1.41M
  case Instruction::PHI: {
5751
1.41M
    const PHINode *P = cast<PHINode>(Op);
5752
    // Unreachable blocks may have zero-operand PHI nodes.
5753
1.41M
    if (P->getNumIncomingValues() == 0)
5754
0
      break;
5755
5756
    // Otherwise take the unions of the known bit sets of the operands,
5757
    // taking conservative care to avoid excessive recursion.
5758
1.41M
    const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
5759
5760
1.41M
    if (Depth < PhiRecursionLimit) {
5761
      // Skip if every incoming value references to ourself.
5762
933k
      if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
5763
0
        break;
5764
5765
933k
      bool First = true;
5766
5767
1.30M
      for (const Use &U : P->operands()) {
5768
1.30M
        Value *IncValue;
5769
1.30M
        Instruction *CxtI;
5770
1.30M
        breakSelfRecursivePHI(&U, P, IncValue, CxtI);
5771
        // Skip direct self references.
5772
1.30M
        if (IncValue == P)
5773
289
          continue;
5774
5775
1.30M
        KnownFPClass KnownSrc;
5776
        // Recurse, but cap the recursion to two levels, because we don't want
5777
        // to waste time spinning around in loops. We need at least depth 2 to
5778
        // detect known sign bits.
5779
1.30M
        computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
5780
1.30M
                            Q.getWithoutCondContext().getWithInstruction(CxtI),
5781
1.30M
                            PhiRecursionLimit);
5782
5783
1.30M
        if (First) {
5784
933k
          Known = KnownSrc;
5785
933k
          First = false;
5786
933k
        } else {
5787
367k
          Known |= KnownSrc;
5788
367k
        }
5789
5790
1.30M
        if (Known.KnownFPClasses == fcAllFlags)
5791
842k
          break;
5792
1.30M
      }
5793
933k
    }
5794
5795
1.41M
    break;
5796
1.41M
  }
5797
1.41M
  case Instruction::BitCast: {
5798
302k
    const Value *Src;
5799
302k
    if (!match(Op, m_ElementWiseBitCast(m_Value(Src))) ||
5800
302k
        
!Src->getType()->isIntOrIntVectorTy()294k
)
5801
8.16k
      break;
5802
5803
294k
    const Type *Ty = Op->getType()->getScalarType();
5804
294k
    KnownBits Bits(Ty->getScalarSizeInBits());
5805
294k
    computeKnownBits(Src, DemandedElts, Bits, Q, Depth + 1);
5806
5807
    // Transfer information from the sign bit.
5808
294k
    if (Bits.isNonNegative())
5809
34.8k
      Known.signBitMustBeZero();
5810
259k
    else if (Bits.isNegative())
5811
102
      Known.signBitMustBeOne();
5812
5813
294k
    if (Ty->isIEEELikeFPTy()) {
5814
      // IEEE floats are NaN when all bits of the exponent plus at least one of
5815
      // the fraction bits are 1. This means:
5816
      //   - If we assume unknown bits are 0 and the value is NaN, it will
5817
      //     always be NaN
5818
      //   - If we assume unknown bits are 1 and the value is not NaN, it can
5819
      //     never be NaN
5820
      // Note: They do not hold for x86_fp80 format.
5821
294k
      if (APFloat(Ty->getFltSemantics(), Bits.One).isNaN())
5822
24
        Known.KnownFPClasses = fcNan;
5823
294k
      else if (!APFloat(Ty->getFltSemantics(), ~Bits.Zero).isNaN())
5824
8.40k
        Known.knownNot(fcNan);
5825
5826
      // Build KnownBits representing Inf and check if it must be equal or
5827
      // unequal to this value.
5828
294k
      auto InfKB = KnownBits::makeConstant(
5829
294k
          APFloat::getInf(Ty->getFltSemantics()).bitcastToAPInt());
5830
294k
      InfKB.Zero.clearSignBit();
5831
294k
      if (const auto InfResult = KnownBits::eq(Bits, InfKB)) {
5832
4.64k
        assert(!InfResult.value());
5833
4.64k
        Known.knownNot(fcInf);
5834
290k
      } else if (Bits == InfKB) {
5835
0
        Known.KnownFPClasses = fcInf;
5836
0
      }
5837
5838
      // Build KnownBits representing Zero and check if it must be equal or
5839
      // unequal to this value.
5840
294k
      auto ZeroKB = KnownBits::makeConstant(
5841
294k
          APFloat::getZero(Ty->getFltSemantics()).bitcastToAPInt());
5842
294k
      ZeroKB.Zero.clearSignBit();
5843
294k
      if (const auto ZeroResult = KnownBits::eq(Bits, ZeroKB)) {
5844
1.97k
        assert(!ZeroResult.value());
5845
1.97k
        Known.knownNot(fcZero);
5846
292k
      } else if (Bits == ZeroKB) {
5847
0
        Known.KnownFPClasses = fcZero;
5848
0
      }
5849
294k
    }
5850
5851
294k
    break;
5852
302k
  }
5853
5.72M
  default:
5854
5.72M
    break;
5855
18.3M
  }
5856
18.3M
}
5857
5858
KnownFPClass llvm::computeKnownFPClass(const Value *V,
5859
                                       const APInt &DemandedElts,
5860
                                       FPClassTest InterestedClasses,
5861
                                       const SimplifyQuery &SQ,
5862
6.77M
                                       unsigned Depth) {
5863
6.77M
  KnownFPClass KnownClasses;
5864
6.77M
  ::computeKnownFPClass(V, DemandedElts, InterestedClasses, KnownClasses, SQ,
5865
6.77M
                        Depth);
5866
6.77M
  return KnownClasses;
5867
6.77M
}
5868
5869
KnownFPClass llvm::computeKnownFPClass(const Value *V,
5870
                                       FPClassTest InterestedClasses,
5871
                                       const SimplifyQuery &SQ,
5872
4.01M
                                       unsigned Depth) {
5873
4.01M
  KnownFPClass Known;
5874
4.01M
  ::computeKnownFPClass(V, Known, InterestedClasses, SQ, Depth);
5875
4.01M
  return Known;
5876
4.01M
}
5877
5878
KnownFPClass llvm::computeKnownFPClass(
5879
    const Value *V, const DataLayout &DL, FPClassTest InterestedClasses,
5880
    const TargetLibraryInfo *TLI, AssumptionCache *AC, const Instruction *CxtI,
5881
0
    const DominatorTree *DT, bool UseInstrInfo, unsigned Depth) {
5882
0
  return computeKnownFPClass(V, InterestedClasses,
5883
0
                             SimplifyQuery(DL, TLI, DT, AC, CxtI, UseInstrInfo),
5884
0
                             Depth);
5885
0
}
5886
5887
KnownFPClass
5888
llvm::computeKnownFPClass(const Value *V, const APInt &DemandedElts,
5889
                          FastMathFlags FMF, FPClassTest InterestedClasses,
5890
2.66M
                          const SimplifyQuery &SQ, unsigned Depth) {
5891
2.66M
  if (FMF.noNaNs())
5892
49.2k
    InterestedClasses &= ~fcNan;
5893
2.66M
  if (FMF.noInfs())
5894
49.2k
    InterestedClasses &= ~fcInf;
5895
5896
2.66M
  KnownFPClass Result =
5897
2.66M
      computeKnownFPClass(V, DemandedElts, InterestedClasses, SQ, Depth);
5898
5899
2.66M
  if (FMF.noNaNs())
5900
49.2k
    Result.KnownFPClasses &= ~fcNan;
5901
2.66M
  if (FMF.noInfs())
5902
49.2k
    Result.KnownFPClasses &= ~fcInf;
5903
2.66M
  return Result;
5904
2.66M
}
5905
5906
KnownFPClass llvm::computeKnownFPClass(const Value *V, FastMathFlags FMF,
5907
                                       FPClassTest InterestedClasses,
5908
                                       const SimplifyQuery &SQ,
5909
2.66M
                                       unsigned Depth) {
5910
2.66M
  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
5911
2.66M
  APInt DemandedElts =
5912
2.66M
      FVTy ? 
APInt::getAllOnes(FVTy->getNumElements())83.3k
:
APInt(1, 1)2.57M
;
5913
2.66M
  return computeKnownFPClass(V, DemandedElts, FMF, InterestedClasses, SQ,
5914
2.66M
                             Depth);
5915
2.66M
}
5916
5917
bool llvm::cannotBeNegativeZero(const Value *V, const SimplifyQuery &SQ,
5918
1.32M
                                unsigned Depth) {
5919
1.32M
  KnownFPClass Known = computeKnownFPClass(V, fcNegZero, SQ, Depth);
5920
1.32M
  return Known.isKnownNeverNegZero();
5921
1.32M
}
5922
5923
bool llvm::cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ,
5924
11.5k
                                       unsigned Depth) {
5925
11.5k
  KnownFPClass Known =
5926
11.5k
      computeKnownFPClass(V, KnownFPClass::OrderedLessThanZeroMask, SQ, Depth);
5927
11.5k
  return Known.cannotBeOrderedLessThanZero();
5928
11.5k
}
5929
5930
bool llvm::isKnownNeverInfinity(const Value *V, const SimplifyQuery &SQ,
5931
8.71k
                                unsigned Depth) {
5932
8.71k
  KnownFPClass Known = computeKnownFPClass(V, fcInf, SQ, Depth);
5933
8.71k
  return Known.isKnownNeverInfinity();
5934
8.71k
}
5935
5936
/// Return true if the floating-point value can never contain a NaN or infinity.
5937
bool llvm::isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ,
5938
0
                                unsigned Depth) {
5939
0
  KnownFPClass Known = computeKnownFPClass(V, fcInf | fcNan, SQ, Depth);
5940
0
  return Known.isKnownNeverNaN() && Known.isKnownNeverInfinity();
5941
0
}
5942
5943
/// Return true if the floating-point scalar value is not a NaN or if the
5944
/// floating-point vector value has no NaN elements. Return false if a value
5945
/// could ever be NaN.
5946
bool llvm::isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ,
5947
189k
                           unsigned Depth) {
5948
189k
  KnownFPClass Known = computeKnownFPClass(V, fcNan, SQ, Depth);
5949
189k
  return Known.isKnownNeverNaN();
5950
189k
}
5951
5952
/// Return false if we can prove that the specified FP value's sign bit is 0.
5953
/// Return true if we can prove that the specified FP value's sign bit is 1.
5954
/// Otherwise return std::nullopt.
5955
std::optional<bool> llvm::computeKnownFPSignBit(const Value *V,
5956
                                                const SimplifyQuery &SQ,
5957
1.14M
                                                unsigned Depth) {
5958
1.14M
  KnownFPClass Known = computeKnownFPClass(V, fcAllFlags, SQ, Depth);
5959
1.14M
  return Known.SignBit;
5960
1.14M
}
5961
5962
8.95k
bool llvm::canIgnoreSignBitOfZero(const Use &U) {
5963
8.95k
  auto *User = cast<Instruction>(U.getUser());
5964
8.95k
  if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
5965
8.12k
    if (FPOp->hasNoSignedZeros())
5966
2
      return true;
5967
8.12k
  }
5968
5969
8.95k
  switch (User->getOpcode()) {
5970
2
  case Instruction::FPToSI:
5971
3
  case Instruction::FPToUI:
5972
3
    return true;
5973
352
  case Instruction::FCmp:
5974
    // fcmp treats both positive and negative zero as equal.
5975
352
    return true;
5976
2.39k
  case Instruction::Call:
5977
2.39k
    if (auto *II = dyn_cast<IntrinsicInst>(User)) {
5978
1.27k
      switch (II->getIntrinsicID()) {
5979
0
      case Intrinsic::fabs:
5980
0
        return true;
5981
0
      case Intrinsic::copysign:
5982
0
        return U.getOperandNo() == 0;
5983
0
      case Intrinsic::is_fpclass:
5984
0
      case Intrinsic::vp_is_fpclass: {
5985
0
        auto Test =
5986
0
            static_cast<FPClassTest>(
5987
0
                cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()) &
5988
0
            FPClassTest::fcZero;
5989
0
        return Test == FPClassTest::fcZero || Test == FPClassTest::fcNone;
5990
0
      }
5991
1.27k
      default:
5992
1.27k
        return false;
5993
1.27k
      }
5994
1.27k
    }
5995
1.12k
    return false;
5996
6.19k
  default:
5997
6.19k
    return false;
5998
8.95k
  }
5999
8.95k
}
6000
6001
593
bool llvm::canIgnoreSignBitOfNaN(const Use &U) {
6002
593
  auto *User = cast<Instruction>(U.getUser());
6003
593
  if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
6004
526
    if (FPOp->hasNoNaNs())
6005
0
      return true;
6006
526
  }
6007
6008
593
  switch (User->getOpcode()) {
6009
2
  case Instruction::FPToSI:
6010
3
  case Instruction::FPToUI:
6011
3
    return true;
6012
  // Proper FP math operations ignore the sign bit of NaN.
6013
22
  case Instruction::FAdd:
6014
27
  case Instruction::FSub:
6015
30
  case Instruction::FMul:
6016
36
  case Instruction::FDiv:
6017
36
  case Instruction::FRem:
6018
36
  case Instruction::FPTrunc:
6019
49
  case Instruction::FPExt:
6020
426
  case Instruction::FCmp:
6021
426
    return true;
6022
  // Bitwise FP operations should preserve the sign bit of NaN.
6023
0
  case Instruction::FNeg:
6024
0
  case Instruction::Select:
6025
61
  case Instruction::PHI:
6026
61
    return false;
6027
4
  case Instruction::Ret:
6028
4
    return User->getFunction()->getAttributes().getRetNoFPClass() &
6029
4
           FPClassTest::fcNan;
6030
39
  case Instruction::Call:
6031
39
  case Instruction::Invoke: {
6032
39
    if (auto *II = dyn_cast<IntrinsicInst>(User)) {
6033
14
      switch (II->getIntrinsicID()) {
6034
0
      case Intrinsic::fabs:
6035
0
        return true;
6036
0
      case Intrinsic::copysign:
6037
0
        return U.getOperandNo() == 0;
6038
      // Other proper FP math intrinsics ignore the sign bit of NaN.
6039
0
      case Intrinsic::maxnum:
6040
0
      case Intrinsic::minnum:
6041
0
      case Intrinsic::maximum:
6042
0
      case Intrinsic::minimum:
6043
0
      case Intrinsic::maximumnum:
6044
0
      case Intrinsic::minimumnum:
6045
0
      case Intrinsic::canonicalize:
6046
0
      case Intrinsic::fma:
6047
1
      case Intrinsic::fmuladd:
6048
3
      case Intrinsic::sqrt:
6049
3
      case Intrinsic::pow:
6050
3
      case Intrinsic::powi:
6051
3
      case Intrinsic::fptoui_sat:
6052
3
      case Intrinsic::fptosi_sat:
6053
3
      case Intrinsic::is_fpclass:
6054
3
      case Intrinsic::vp_is_fpclass:
6055
3
        return true;
6056
11
      default:
6057
11
        return false;
6058
14
      }
6059
14
    }
6060
6061
25
    FPClassTest NoFPClass =
6062
25
        cast<CallBase>(User)->getParamNoFPClass(U.getOperandNo());
6063
25
    return NoFPClass & FPClassTest::fcNan;
6064
39
  }
6065
60
  default:
6066
60
    return false;
6067
593
  }
6068
593
}
6069
6070
54.3M
Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
6071
6072
  // All byte-wide stores are splatable, even of arbitrary variables.
6073
54.3M
  if (V->getType()->isIntegerTy(8))
6074
8.79M
    return V;
6075
6076
45.5M
  LLVMContext &Ctx = V->getContext();
6077
6078
  // Undef don't care.
6079
45.5M
  auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
6080
45.5M
  if (isa<UndefValue>(V))
6081
4.07k
    return UndefInt8;
6082
6083
  // Return poison for zero-sized type.
6084
45.5M
  if (DL.getTypeStoreSize(V->getType()).isZero())
6085
0
    return PoisonValue::get(Type::getInt8Ty(Ctx));
6086
6087
45.5M
  Constant *C = dyn_cast<Constant>(V);
6088
45.5M
  if (!C) {
6089
    // Conceptually, we could handle things like:
6090
    //   %a = zext i8 %X to i16
6091
    //   %b = shl i16 %a, 8
6092
    //   %c = or i16 %a, %b
6093
    // but until there is an example that actually needs this, it doesn't seem
6094
    // worth worrying about.
6095
24.3M
    return nullptr;
6096
24.3M
  }
6097
6098
  // Handle 'null' ConstantArrayZero etc.
6099
21.2M
  if (C->isNullValue())
6100
9.27M
    return Constant::getNullValue(Type::getInt8Ty(Ctx));
6101
6102
  // Constant floating-point values can be handled as integer values if the
6103
  // corresponding integer value is "byteable".  An important case is 0.0.
6104
11.9M
  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
6105
369k
    Type *Ty = nullptr;
6106
369k
    if (CFP->getType()->isHalfTy())
6107
12
      Ty = Type::getInt16Ty(Ctx);
6108
369k
    else if (CFP->getType()->isFloatTy())
6109
218k
      Ty = Type::getInt32Ty(Ctx);
6110
151k
    else if (CFP->getType()->isDoubleTy())
6111
146k
      Ty = Type::getInt64Ty(Ctx);
6112
    // Don't handle long double formats, which have strange constraints.
6113
369k
    return Ty ? 
isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)365k
6114
369k
              : 
nullptr4.33k
;
6115
369k
  }
6116
6117
  // We can handle constant integers that are multiple of 8 bits.
6118
11.5M
  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
6119
6.82M
    if (CI->getBitWidth() % 8 == 0) {
6120
6.81M
      assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
6121
6.81M
      if (!CI->getValue().isSplat(8))
6122
6.43M
        return nullptr;
6123
380k
      return ConstantInt::get(Ctx, CI->getValue().trunc(8));
6124
6.81M
    }
6125
6.82M
  }
6126
6127
4.74M
  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
6128
2.39M
    if (CE->getOpcode() == Instruction::IntToPtr) {
6129
682k
      if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) {
6130
682k
        unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace());
6131
682k
        if (Constant *Op = ConstantFoldIntegerCast(
6132
682k
                CE->getOperand(0), Type::getIntNTy(Ctx, BitWidth), false, DL))
6133
682k
          return isBytewiseValue(Op, DL);
6134
682k
      }
6135
682k
    }
6136
2.39M
  }
6137
6138
4.06M
  auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
6139
827k
    if (LHS == RHS)
6140
185k
      return LHS;
6141
642k
    if (!LHS || !RHS)
6142
86.2k
      return nullptr;
6143
555k
    if (LHS == UndefInt8)
6144
284k
      return RHS;
6145
270k
    if (RHS == UndefInt8)
6146
1.69k
      return LHS;
6147
269k
    return nullptr;
6148
270k
  };
6149
6150
4.06M
  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
6151
288k
    Value *Val = UndefInt8;
6152
745k
    for (uint64_t I = 0, E = CA->getNumElements(); I != E; 
++I456k
)
6153
743k
      if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
6154
286k
        return nullptr;
6155
2.32k
    return Val;
6156
288k
  }
6157
6158
3.77M
  if (isa<ConstantAggregate>(C)) {
6159
70.3k
    Value *Val = UndefInt8;
6160
70.3k
    for (Value *Op : C->operands())
6161
84.4k
      if (!(Val = Merge(Val, isBytewiseValue(Op, DL))))
6162
69.2k
        return nullptr;
6163
1.16k
    return Val;
6164
70.3k
  }
6165
6166
  // Don't try to handle the handful of other constants.
6167
3.70M
  return nullptr;
6168
3.77M
}
6169
6170
// This is the recursive version of BuildSubAggregate. It takes a few different
6171
// arguments. Idxs is the index within the nested struct From that we are
6172
// looking at now (which is of type IndexedType). IdxSkip is the number of
6173
// indices from Idxs that should be left out when inserting into the resulting
6174
// struct. To is the result struct built so far, new insertvalue instructions
6175
// build on that.
6176
static Value *BuildSubAggregate(Value *From, Value *To, Type *IndexedType,
6177
                                SmallVectorImpl<unsigned> &Idxs,
6178
                                unsigned IdxSkip,
6179
0
                                BasicBlock::iterator InsertBefore) {
6180
0
  StructType *STy = dyn_cast<StructType>(IndexedType);
6181
0
  if (STy) {
6182
    // Save the original To argument so we can modify it
6183
0
    Value *OrigTo = To;
6184
    // General case, the type indexed by Idxs is a struct
6185
0
    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
6186
      // Process each struct element recursively
6187
0
      Idxs.push_back(i);
6188
0
      Value *PrevTo = To;
6189
0
      To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
6190
0
                             InsertBefore);
6191
0
      Idxs.pop_back();
6192
0
      if (!To) {
6193
        // Couldn't find any inserted value for this index? Cleanup
6194
0
        while (PrevTo != OrigTo) {
6195
0
          InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
6196
0
          PrevTo = Del->getAggregateOperand();
6197
0
          Del->eraseFromParent();
6198
0
        }
6199
        // Stop processing elements
6200
0
        break;
6201
0
      }
6202
0
    }
6203
    // If we successfully found a value for each of our subaggregates
6204
0
    if (To)
6205
0
      return To;
6206
0
  }
6207
  // Base case, the type indexed by SourceIdxs is not a struct, or not all of
6208
  // the struct's elements had a value that was inserted directly. In the latter
6209
  // case, perhaps we can't determine each of the subelements individually, but
6210
  // we might be able to find the complete struct somewhere.
6211
6212
  // Find the value that is at that particular spot
6213
0
  Value *V = FindInsertedValue(From, Idxs);
6214
6215
0
  if (!V)
6216
0
    return nullptr;
6217
6218
  // Insert the value in the new (sub) aggregate
6219
0
  return InsertValueInst::Create(To, V, ArrayRef(Idxs).slice(IdxSkip), "tmp",
6220
0
                                 InsertBefore);
6221
0
}
6222
6223
// This helper takes a nested struct and extracts a part of it (which is again a
6224
// struct) into a new value. For example, given the struct:
6225
// { a, { b, { c, d }, e } }
6226
// and the indices "1, 1" this returns
6227
// { c, d }.
6228
//
6229
// It does this by inserting an insertvalue for each element in the resulting
6230
// struct, as opposed to just inserting a single struct. This will only work if
6231
// each of the elements of the substruct are known (ie, inserted into From by an
6232
// insertvalue instruction somewhere).
6233
//
6234
// All inserted insertvalue instructions are inserted before InsertBefore
6235
static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
6236
0
                                BasicBlock::iterator InsertBefore) {
6237
0
  Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
6238
0
                                                             idx_range);
6239
0
  Value *To = PoisonValue::get(IndexedType);
6240
0
  SmallVector<unsigned, 10> Idxs(idx_range);
6241
0
  unsigned IdxSkip = Idxs.size();
6242
6243
0
  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
6244
0
}
6245
6246
/// Given an aggregate and a sequence of indices, see if the scalar value
6247
/// indexed is already around as a register, for example if it was inserted
6248
/// directly into the aggregate.
6249
///
6250
/// If InsertBefore is not null, this function will duplicate (modified)
6251
/// insertvalues when a part of a nested struct is extracted.
6252
Value *
6253
llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
6254
0
                        std::optional<BasicBlock::iterator> InsertBefore) {
6255
  // Nothing to index? Just return V then (this is useful at the end of our
6256
  // recursion).
6257
0
  if (idx_range.empty())
6258
0
    return V;
6259
  // We have indices, so V should have an indexable type.
6260
0
  assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
6261
0
         "Not looking at a struct or array?");
6262
0
  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
6263
0
         "Invalid indices for type?");
6264
6265
0
  if (Constant *C = dyn_cast<Constant>(V)) {
6266
0
    C = C->getAggregateElement(idx_range[0]);
6267
0
    if (!C) return nullptr;
6268
0
    return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
6269
0
  }
6270
6271
0
  if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
6272
    // Loop the indices for the insertvalue instruction in parallel with the
6273
    // requested indices
6274
0
    const unsigned *req_idx = idx_range.begin();
6275
0
    for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
6276
0
         i != e; ++i, ++req_idx) {
6277
0
      if (req_idx == idx_range.end()) {
6278
        // We can't handle this without inserting insertvalues
6279
0
        if (!InsertBefore)
6280
0
          return nullptr;
6281
6282
        // The requested index identifies a part of a nested aggregate. Handle
6283
        // this specially. For example,
6284
        // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
6285
        // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
6286
        // %C = extractvalue {i32, { i32, i32 } } %B, 1
6287
        // This can be changed into
6288
        // %A = insertvalue {i32, i32 } undef, i32 10, 0
6289
        // %C = insertvalue {i32, i32 } %A, i32 11, 1
6290
        // which allows the unused 0,0 element from the nested struct to be
6291
        // removed.
6292
0
        return BuildSubAggregate(V, ArrayRef(idx_range.begin(), req_idx),
6293
0
                                 *InsertBefore);
6294
0
      }
6295
6296
      // This insert value inserts something else than what we are looking for.
6297
      // See if the (aggregate) value inserted into has the value we are
6298
      // looking for, then.
6299
0
      if (*req_idx != *i)
6300
0
        return FindInsertedValue(I->getAggregateOperand(), idx_range,
6301
0
                                 InsertBefore);
6302
0
    }
6303
    // If we end up here, the indices of the insertvalue match with those
6304
    // requested (though possibly only partially). Now we recursively look at
6305
    // the inserted value, passing any remaining indices.
6306
0
    return FindInsertedValue(I->getInsertedValueOperand(),
6307
0
                             ArrayRef(req_idx, idx_range.end()), InsertBefore);
6308
0
  }
6309
6310
0
  if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
6311
    // If we're extracting a value from an aggregate that was extracted from
6312
    // something else, we can extract from that something else directly instead.
6313
    // However, we will need to chain I's indices with the requested indices.
6314
6315
    // Calculate the number of indices required
6316
0
    unsigned size = I->getNumIndices() + idx_range.size();
6317
    // Allocate some space to put the new indices in
6318
0
    SmallVector<unsigned, 5> Idxs;
6319
0
    Idxs.reserve(size);
6320
    // Add indices from the extract value instruction
6321
0
    Idxs.append(I->idx_begin(), I->idx_end());
6322
6323
    // Add requested indices
6324
0
    Idxs.append(idx_range.begin(), idx_range.end());
6325
6326
0
    assert(Idxs.size() == size
6327
0
           && "Number of indices added not correct?");
6328
6329
0
    return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
6330
0
  }
6331
  // Otherwise, we don't know (such as, extracting from a function return value
6332
  // or load instruction)
6333
0
  return nullptr;
6334
0
}
6335
6336
bool llvm::isGEPBasedOnPointerToString(const GEPOperator *GEP,
6337
36.7k
                                       unsigned CharSize) {
6338
  // Make sure the GEP has exactly three arguments.
6339
36.7k
  if (GEP->getNumOperands() != 3)
6340
36.0k
    return false;
6341
6342
  // Make sure the index-ee is a pointer to array of \p CharSize integers.
6343
  // CharSize.
6344
690
  ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
6345
690
  if (!AT || 
!AT->getElementType()->isIntegerTy(CharSize)634
)
6346
614
    return false;
6347
6348
  // Check to make sure that the first operand of the GEP is an integer and
6349
  // has value 0 so that we are sure we're indexing into the initializer.
6350
76
  const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
6351
76
  if (!FirstIdx || !FirstIdx->isZero())
6352
0
    return false;
6353
6354
76
  return true;
6355
76
}
6356
6357
// If V refers to an initialized global constant, set Slice either to
6358
// its initializer if the size of its elements equals ElementSize, or,
6359
// for ElementSize == 8, to its representation as an array of unsiged
6360
// char. Return true on success.
6361
// Offset is in the unit "nr of ElementSize sized elements".
6362
bool llvm::getConstantDataArrayInfo(const Value *V,
6363
                                    ConstantDataArraySlice &Slice,
6364
5.39M
                                    unsigned ElementSize, uint64_t Offset) {
6365
5.39M
  assert(V && "V should not be null.");
6366
5.39M
  assert((ElementSize % 8) == 0 &&
6367
5.39M
         "ElementSize expected to be a multiple of the size of a byte.");
6368
5.39M
  unsigned ElementSizeInBytes = ElementSize / 8;
6369
6370
  // Drill down into the pointer expression V, ignoring any intervening
6371
  // casts, and determine the identity of the object it references along
6372
  // with the cumulative byte offset into it.
6373
5.39M
  const GlobalVariable *GV =
6374
5.39M
    dyn_cast<GlobalVariable>(getUnderlyingObject(V));
6375
5.39M
  if (!GV || 
!GV->isConstant()1.94M
||
!GV->hasDefinitiveInitializer()1.93M
)
6376
    // Fail if V is not based on constant global object.
6377
3.45M
    return false;
6378
6379
1.93M
  const DataLayout &DL = GV->getDataLayout();
6380
1.93M
  APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
6381
6382
1.93M
  if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
6383
1.93M
                                                 /*AllowNonInbounds*/ true))
6384
    // Fail if a constant offset could not be determined.
6385
3.39k
    return false;
6386
6387
1.93M
  uint64_t StartIdx = Off.getLimitedValue();
6388
1.93M
  if (StartIdx == UINT64_MAX)
6389
    // Fail if the constant offset is excessive.
6390
0
    return false;
6391
6392
  // Off/StartIdx is in the unit of bytes. So we need to convert to number of
6393
  // elements. Simply bail out if that isn't possible.
6394
1.93M
  if ((StartIdx % ElementSizeInBytes) != 0)
6395
0
    return false;
6396
6397
1.93M
  Offset += StartIdx / ElementSizeInBytes;
6398
1.93M
  ConstantDataArray *Array = nullptr;
6399
1.93M
  ArrayType *ArrayTy = nullptr;
6400
6401
1.93M
  if (GV->getInitializer()->isNullValue()) {
6402
36.9k
    Type *GVTy = GV->getValueType();
6403
36.9k
    uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedValue();
6404
36.9k
    uint64_t Length = SizeInBytes / ElementSizeInBytes;
6405
6406
36.9k
    Slice.Array = nullptr;
6407
36.9k
    Slice.Offset = 0;
6408
    // Return an empty Slice for undersized constants to let callers
6409
    // transform even undefined library calls into simpler, well-defined
6410
    // expressions.  This is preferable to making the calls although it
6411
    // prevents sanitizers from detecting such calls.
6412
36.9k
    Slice.Length = Length < Offset ? 
00
: Length - Offset;
6413
36.9k
    return true;
6414
36.9k
  }
6415
6416
1.89M
  auto *Init = const_cast<Constant *>(GV->getInitializer());
6417
1.89M
  if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
6418
1.88M
    Type *InitElTy = ArrayInit->getElementType();
6419
1.88M
    if (InitElTy->isIntegerTy(ElementSize)) {
6420
      // If Init is an initializer for an array of the expected type
6421
      // and size, use it as is.
6422
1.88M
      Array = ArrayInit;
6423
1.88M
      ArrayTy = ArrayInit->getType();
6424
1.88M
    }
6425
1.88M
  }
6426
6427
1.89M
  if (!Array) {
6428
8.76k
    if (ElementSize != 8)
6429
      // TODO: Handle conversions to larger integral types.
6430
0
      return false;
6431
6432
    // Otherwise extract the portion of the initializer starting
6433
    // at Offset as an array of bytes, and reset Offset.
6434
8.76k
    Init = ReadByteArrayFromGlobal(GV, Offset);
6435
8.76k
    if (!Init)
6436
20
      return false;
6437
6438
8.74k
    Offset = 0;
6439
8.74k
    Array = dyn_cast<ConstantDataArray>(Init);
6440
8.74k
    ArrayTy = dyn_cast<ArrayType>(Init->getType());
6441
8.74k
  }
6442
6443
1.89M
  uint64_t NumElts = ArrayTy->getArrayNumElements();
6444
1.89M
  if (Offset > NumElts)
6445
0
    return false;
6446
6447
1.89M
  Slice.Array = Array;
6448
1.89M
  Slice.Offset = Offset;
6449
1.89M
  Slice.Length = NumElts - Offset;
6450
1.89M
  return true;
6451
1.89M
}
6452
6453
/// Extract bytes from the initializer of the constant array V, which need
6454
/// not be a nul-terminated string.  On success, store the bytes in Str and
6455
/// return true.  When TrimAtNul is set, Str will contain only the bytes up
6456
/// to but not including the first nul.  Return false on failure.
6457
bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
6458
3.09M
                                 bool TrimAtNul) {
6459
3.09M
  ConstantDataArraySlice Slice;
6460
3.09M
  if (!getConstantDataArrayInfo(V, Slice, 8))
6461
2.06M
    return false;
6462
6463
1.02M
  if (Slice.Array == nullptr) {
6464
227
    if (TrimAtNul) {
6465
      // Return a nul-terminated string even for an empty Slice.  This is
6466
      // safe because all existing SimplifyLibcalls callers require string
6467
      // arguments and the behavior of the functions they fold is undefined
6468
      // otherwise.  Folding the calls this way is preferable to making
6469
      // the undefined library calls, even though it prevents sanitizers
6470
      // from reporting such calls.
6471
196
      Str = StringRef();
6472
196
      return true;
6473
196
    }
6474
31
    if (Slice.Length == 1) {
6475
5
      Str = StringRef("", 1);
6476
5
      return true;
6477
5
    }
6478
    // We cannot instantiate a StringRef as we do not have an appropriate string
6479
    // of 0s at hand.
6480
26
    return false;
6481
31
  }
6482
6483
  // Start out with the entire array in the StringRef.
6484
1.02M
  Str = Slice.Array->getAsString();
6485
  // Skip over 'offset' bytes.
6486
1.02M
  Str = Str.substr(Slice.Offset);
6487
6488
1.02M
  if (TrimAtNul) {
6489
    // Trim off the \0 and anything after it.  If the array is not nul
6490
    // terminated, we just return the whole end of string.  The client may know
6491
    // some other way that the string is length-bound.
6492
957k
    Str = Str.substr(0, Str.find('\0'));
6493
957k
  }
6494
1.02M
  return true;
6495
1.02M
}
6496
6497
// These next two are very similar to the above, but also look through PHI
6498
// nodes.
6499
// TODO: See if we can integrate these two together.
6500
6501
/// If we can compute the length of the string pointed to by
6502
/// the specified pointer, return 'len+1'.  If we can't, return 0.
6503
static uint64_t GetStringLengthH(const Value *V,
6504
                                 SmallPtrSetImpl<const PHINode*> &PHIs,
6505
2.42M
                                 unsigned CharSize) {
6506
  // Look through noop bitcast instructions.
6507
2.42M
  V = V->stripPointerCasts();
6508
6509
  // If this is a PHI node, there are two cases: either we have already seen it
6510
  // or we haven't.
6511
2.42M
  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
6512
87.0k
    if (!PHIs.insert(PN).second)
6513
2.90k
      return ~0ULL;  // already in the set.
6514
6515
    // If it was new, see if all the input strings are the same length.
6516
84.1k
    uint64_t LenSoFar = ~0ULL;
6517
98.8k
    for (Value *IncValue : PN->incoming_values()) {
6518
98.8k
      uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
6519
98.8k
      if (Len == 0) 
return 078.8k
; // Unknown length -> unknown.
6520
6521
19.9k
      if (Len == ~0ULL) 
continue2.88k
;
6522
6523
17.0k
      if (Len != LenSoFar && 
LenSoFar != ~0ULL15.1k
)
6524
4.94k
        return 0;    // Disagree -> unknown.
6525
12.1k
      LenSoFar = Len;
6526
12.1k
    }
6527
6528
    // Success, all agree.
6529
279
    return LenSoFar;
6530
84.1k
  }
6531
6532
  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
6533
2.33M
  if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
6534
36.8k
    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
6535
36.8k
    if (Len1 == 0) 
return 06.68k
;
6536
30.1k
    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
6537
30.1k
    if (Len2 == 0) 
return 016.9k
;
6538
13.2k
    if (Len1 == ~0ULL) 
return Len20
;
6539
13.2k
    if (Len2 == ~0ULL) 
return Len10
;
6540
13.2k
    if (Len1 != Len2) 
return 012.2k
;
6541
942
    return Len1;
6542
13.2k
  }
6543
6544
  // Otherwise, see if we can read the string.
6545
2.29M
  ConstantDataArraySlice Slice;
6546
2.29M
  if (!getConstantDataArrayInfo(V, Slice, CharSize))
6547
1.38M
    return 0;
6548
6549
910k
  if (Slice.Array == nullptr)
6550
    // Zeroinitializer (including an empty one).
6551
36.7k
    return 1;
6552
6553
  // Search for the first nul character.  Return a conservative result even
6554
  // when there is no nul.  This is safe since otherwise the string function
6555
  // being folded such as strlen is undefined, and can be preferable to
6556
  // making the undefined library call.
6557
873k
  unsigned NullIndex = 0;
6558
19.3M
  for (unsigned E = Slice.Length; NullIndex < E; 
++NullIndex18.5M
) {
6559
19.3M
    if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
6560
873k
      break;
6561
19.3M
  }
6562
6563
873k
  return NullIndex + 1;
6564
910k
}
6565
6566
/// If we can compute the length of the string pointed to by
6567
/// the specified pointer, return 'len+1'.  If we can't, return 0.
6568
2.25M
uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
6569
2.25M
  if (!V->getType()->isPointerTy())
6570
0
    return 0;
6571
6572
2.25M
  SmallPtrSet<const PHINode*, 32> PHIs;
6573
2.25M
  uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
6574
  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
6575
  // an empty string as a length.
6576
2.25M
  return Len == ~0ULL ? 
10
: Len;
6577
2.25M
}
6578
6579
const Value *
6580
llvm::getArgumentAliasingToReturnedPointer(const CallBase *Call,
6581
489M
                                           bool MustPreserveNullness) {
6582
489M
  assert(Call &&
6583
489M
         "getArgumentAliasingToReturnedPointer only works on nonnull calls");
6584
489M
  if (const Value *RV = Call->getReturnedArgOperand())
6585
93.6k
    return RV;
6586
  // This can be used only as a aliasing property.
6587
489M
  if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6588
489M
          Call, MustPreserveNullness))
6589
2.49M
    return Call->getArgOperand(0);
6590
486M
  return nullptr;
6591
489M
}
6592
6593
bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
6594
736M
    const CallBase *Call, bool MustPreserveNullness) {
6595
736M
  switch (Call->getIntrinsicID()) {
6596
0
  case Intrinsic::launder_invariant_group:
6597
0
  case Intrinsic::strip_invariant_group:
6598
0
  case Intrinsic::aarch64_irg:
6599
0
  case Intrinsic::aarch64_tagp:
6600
  // The amdgcn_make_buffer_rsrc function does not alter the address of the
6601
  // input pointer (and thus preserve null-ness for the purposes of escape
6602
  // analysis, which is where the MustPreserveNullness flag comes in to play).
6603
  // However, it will not necessarily map ptr addrspace(N) null to ptr
6604
  // addrspace(8) null, aka the "null descriptor", which has "all loads return
6605
  // 0, all stores are dropped" semantics. Given the context of this intrinsic
6606
  // list, no one should be relying on such a strict interpretation of
6607
  // MustPreserveNullness (and, at time of writing, they are not), but we
6608
  // document this fact out of an abundance of caution.
6609
0
  case Intrinsic::amdgcn_make_buffer_rsrc:
6610
0
    return true;
6611
6.77k
  case Intrinsic::ptrmask:
6612
6.77k
    return !MustPreserveNullness;
6613
2.49M
  case Intrinsic::threadlocal_address:
6614
    // The underlying variable changes with thread ID. The Thread ID may change
6615
    // at coroutine suspend points.
6616
2.49M
    return !Call->getParent()->getParent()->isPresplitCoroutine();
6617
733M
  default:
6618
733M
    return false;
6619
736M
  }
6620
736M
}
6621
6622
/// \p PN defines a loop-variant pointer to an object.  Check if the
6623
/// previous iteration of the loop was referring to the same object as \p PN.
6624
static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
6625
558k
                                         const LoopInfo *LI) {
6626
  // Find the loop-defined value.
6627
558k
  Loop *L = LI->getLoopFor(PN->getParent());
6628
558k
  if (PN->getNumIncomingValues() != 2)
6629
0
    return true;
6630
6631
  // Find the value from previous iteration.
6632
558k
  auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
6633
558k
  if (!PrevValue || 
LI->getLoopFor(PrevValue->getParent()) != L519k
)
6634
185k
    PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
6635
558k
  if (!PrevValue || 
LI->getLoopFor(PrevValue->getParent()) != L558k
)
6636
1.75k
    return true;
6637
6638
  // If a new pointer is loaded in the loop, the pointer references a different
6639
  // object in every iteration.  E.g.:
6640
  //    for (i)
6641
  //       int *p = a[i];
6642
  //       ...
6643
556k
  if (auto *Load = dyn_cast<LoadInst>(PrevValue))
6644
4.58k
    if (!L->isLoopInvariant(Load->getPointerOperand()))
6645
1.74k
      return false;
6646
555k
  return true;
6647
556k
}
6648
6649
8.00G
const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) {
6650
11.8G
  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; 
++Count3.86G
) {
6651
11.8G
    if (auto *GEP = dyn_cast<GEPOperator>(V)) {
6652
3.85G
      const Value *PtrOp = GEP->getPointerOperand();
6653
3.85G
      if (!PtrOp->getType()->isPointerTy()) // Only handle scalar pointer base.
6654
42
        return V;
6655
3.85G
      V = PtrOp;
6656
8.01G
    } else if (Operator::getOpcode(V) == Instruction::BitCast ||
6657
8.01G
               Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
6658
0
      Value *NewV = cast<Operator>(V)->getOperand(0);
6659
0
      if (!NewV->getType()->isPointerTy())
6660
0
        return V;
6661
0
      V = NewV;
6662
8.01G
    } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
6663
662
      if (GA->isInterposable())
6664
0
        return V;
6665
662
      V = GA->getAliasee();
6666
8.01G
    } else {
6667
8.01G
      if (auto *PHI = dyn_cast<PHINode>(V)) {
6668
        // Look through single-arg phi nodes created by LCSSA.
6669
780M
        if (PHI->getNumIncomingValues() == 1) {
6670
8.63M
          V = PHI->getIncomingValue(0);
6671
8.63M
          continue;
6672
8.63M
        }
6673
7.23G
      } else if (auto *Call = dyn_cast<CallBase>(V)) {
6674
        // CaptureTracking can know about special capturing properties of some
6675
        // intrinsics like launder.invariant.group, that can't be expressed with
6676
        // the attributes, but have properties like returning aliasing pointer.
6677
        // Because some analysis may assume that nocaptured pointer is not
6678
        // returned from some special intrinsic (because function would have to
6679
        // be marked with returns attribute), it is crucial to use this function
6680
        // because it should be in sync with CaptureTracking. Not using it may
6681
        // cause weird miscompilations where 2 aliasing pointers are assumed to
6682
        // noalias.
6683
392M
        if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
6684
2.41M
          V = RP;
6685
2.41M
          continue;
6686
2.41M
        }
6687
392M
      }
6688
6689
8.00G
      return V;
6690
8.01G
    }
6691
11.8G
    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
6692
3.85G
  }
6693
60.4k
  return V;
6694
8.00G
}
6695
6696
void llvm::getUnderlyingObjects(const Value *V,
6697
                                SmallVectorImpl<const Value *> &Objects,
6698
394M
                                const LoopInfo *LI, unsigned MaxLookup) {
6699
394M
  SmallPtrSet<const Value *, 4> Visited;
6700
394M
  SmallVector<const Value *, 4> Worklist;
6701
394M
  Worklist.push_back(V);
6702
683M
  do {
6703
683M
    const Value *P = Worklist.pop_back_val();
6704
683M
    P = getUnderlyingObject(P, MaxLookup);
6705
6706
683M
    if (!Visited.insert(P).second)
6707
131M
      continue;
6708
6709
552M
    if (auto *SI = dyn_cast<SelectInst>(P)) {
6710
10.1M
      Worklist.push_back(SI->getTrueValue());
6711
10.1M
      Worklist.push_back(SI->getFalseValue());
6712
10.1M
      continue;
6713
10.1M
    }
6714
6715
542M
    if (auto *PN = dyn_cast<PHINode>(P)) {
6716
      // If this PHI changes the underlying object in every iteration of the
6717
      // loop, don't look through it.  Consider:
6718
      //   int **A;
6719
      //   for (i) {
6720
      //     Prev = Curr;     // Prev = PHI (Prev_0, Curr)
6721
      //     Curr = A[i];
6722
      //     *Prev, *Curr;
6723
      //
6724
      // Prev is tracking Curr one iteration behind so they refer to different
6725
      // underlying objects.
6726
104M
      if (!LI || 
!LI->isLoopHeader(PN->getParent())992k
||
6727
104M
          
isSameUnderlyingObjectInLoop(PN, LI)558k
)
6728
104M
        append_range(Worklist, PN->incoming_values());
6729
1.74k
      else
6730
1.74k
        Objects.push_back(P);
6731
104M
      continue;
6732
104M
    }
6733
6734
437M
    Objects.push_back(P);
6735
683M
  } while (!Worklist.empty());
6736
394M
}
6737
6738
86.3M
const Value *llvm::getUnderlyingObjectAggressive(const Value *V) {
6739
86.3M
  const unsigned MaxVisited = 8;
6740
6741
86.3M
  SmallPtrSet<const Value *, 8> Visited;
6742
86.3M
  SmallVector<const Value *, 8> Worklist;
6743
86.3M
  Worklist.push_back(V);
6744
86.3M
  const Value *Object = nullptr;
6745
  // Used as fallback if we can't find a common underlying object through
6746
  // recursion.
6747
86.3M
  bool First = true;
6748
86.3M
  const Value *FirstObject = getUnderlyingObject(V);
6749
119M
  do {
6750
119M
    const Value *P = Worklist.pop_back_val();
6751
119M
    P = First ? 
FirstObject86.3M
:
getUnderlyingObject(P)33.4M
;
6752
119M
    First = false;
6753
6754
119M
    if (!Visited.insert(P).second)
6755
10.8M
      continue;
6756
6757
109M
    if (Visited.size() == MaxVisited)
6758
331k
      return FirstObject;
6759
6760
108M
    if (auto *SI = dyn_cast<SelectInst>(P)) {
6761
1.26M
      Worklist.push_back(SI->getTrueValue());
6762
1.26M
      Worklist.push_back(SI->getFalseValue());
6763
1.26M
      continue;
6764
1.26M
    }
6765
6766
107M
    if (auto *PN = dyn_cast<PHINode>(P)) {
6767
15.4M
      append_range(Worklist, PN->incoming_values());
6768
15.4M
      continue;
6769
15.4M
    }
6770
6771
91.9M
    if (!Object)
6772
86.2M
      Object = P;
6773
5.72M
    else if (Object != P)
6774
5.72M
      return FirstObject;
6775
113M
  } while (!Worklist.empty());
6776
6777
80.3M
  return Object ? Object : 
FirstObject0
;
6778
86.3M
}
6779
6780
/// This is the function that does the work of looking through basic
6781
/// ptrtoint+arithmetic+inttoptr sequences.
6782
0
static const Value *getUnderlyingObjectFromInt(const Value *V) {
6783
0
  do {
6784
0
    if (const Operator *U = dyn_cast<Operator>(V)) {
6785
      // If we find a ptrtoint, we can transfer control back to the
6786
      // regular getUnderlyingObjectFromInt.
6787
0
      if (U->getOpcode() == Instruction::PtrToInt)
6788
0
        return U->getOperand(0);
6789
      // If we find an add of a constant, a multiplied value, or a phi, it's
6790
      // likely that the other operand will lead us to the base
6791
      // object. We don't have to worry about the case where the
6792
      // object address is somehow being computed by the multiply,
6793
      // because our callers only care when the result is an
6794
      // identifiable object.
6795
0
      if (U->getOpcode() != Instruction::Add ||
6796
0
          (!isa<ConstantInt>(U->getOperand(1)) &&
6797
0
           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
6798
0
           !isa<PHINode>(U->getOperand(1))))
6799
0
        return V;
6800
0
      V = U->getOperand(0);
6801
0
    } else {
6802
0
      return V;
6803
0
    }
6804
0
    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
6805
0
  } while (true);
6806
0
}
6807
6808
/// This is a wrapper around getUnderlyingObjects and adds support for basic
6809
/// ptrtoint+arithmetic+inttoptr sequences.
6810
/// It returns false if unidentified object is found in getUnderlyingObjects.
6811
bool llvm::getUnderlyingObjectsForCodeGen(const Value *V,
6812
0
                                          SmallVectorImpl<Value *> &Objects) {
6813
0
  SmallPtrSet<const Value *, 16> Visited;
6814
0
  SmallVector<const Value *, 4> Working(1, V);
6815
0
  do {
6816
0
    V = Working.pop_back_val();
6817
6818
0
    SmallVector<const Value *, 4> Objs;
6819
0
    getUnderlyingObjects(V, Objs);
6820
6821
0
    for (const Value *V : Objs) {
6822
0
      if (!Visited.insert(V).second)
6823
0
        continue;
6824
0
      if (Operator::getOpcode(V) == Instruction::IntToPtr) {
6825
0
        const Value *O =
6826
0
          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
6827
0
        if (O->getType()->isPointerTy()) {
6828
0
          Working.push_back(O);
6829
0
          continue;
6830
0
        }
6831
0
      }
6832
      // If getUnderlyingObjects fails to find an identifiable object,
6833
      // getUnderlyingObjectsForCodeGen also fails for safety.
6834
0
      if (!isIdentifiedObject(V)) {
6835
0
        Objects.clear();
6836
0
        return false;
6837
0
      }
6838
0
      Objects.push_back(const_cast<Value *>(V));
6839
0
    }
6840
0
  } while (!Working.empty());
6841
0
  return true;
6842
0
}
6843
6844
0
AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) {
6845
0
  AllocaInst *Result = nullptr;
6846
0
  SmallPtrSet<Value *, 4> Visited;
6847
0
  SmallVector<Value *, 4> Worklist;
6848
6849
0
  auto AddWork = [&](Value *V) {
6850
0
    if (Visited.insert(V).second)
6851
0
      Worklist.push_back(V);
6852
0
  };
6853
6854
0
  AddWork(V);
6855
0
  do {
6856
0
    V = Worklist.pop_back_val();
6857
0
    assert(Visited.count(V));
6858
6859
0
    if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
6860
0
      if (Result && Result != AI)
6861
0
        return nullptr;
6862
0
      Result = AI;
6863
0
    } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
6864
0
      AddWork(CI->getOperand(0));
6865
0
    } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
6866
0
      for (Value *IncValue : PN->incoming_values())
6867
0
        AddWork(IncValue);
6868
0
    } else if (auto *SI = dyn_cast<SelectInst>(V)) {
6869
0
      AddWork(SI->getTrueValue());
6870
0
      AddWork(SI->getFalseValue());
6871
0
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
6872
0
      if (OffsetZero && !GEP->hasAllZeroIndices())
6873
0
        return nullptr;
6874
0
      AddWork(GEP->getPointerOperand());
6875
0
    } else if (CallBase *CB = dyn_cast<CallBase>(V)) {
6876
0
      Value *Returned = CB->getReturnedArgOperand();
6877
0
      if (Returned)
6878
0
        AddWork(Returned);
6879
0
      else
6880
0
        return nullptr;
6881
0
    } else {
6882
0
      return nullptr;
6883
0
    }
6884
0
  } while (!Worklist.empty());
6885
6886
0
  return Result;
6887
0
}
6888
6889
static bool onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6890
106
    const Value *V, bool AllowLifetime, bool AllowDroppable) {
6891
106
  for (const User *U : V->users()) {
6892
106
    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
6893
106
    if (!II)
6894
106
      return false;
6895
6896
0
    if (AllowLifetime && II->isLifetimeStartOrEnd())
6897
0
      continue;
6898
6899
0
    if (AllowDroppable && II->isDroppable())
6900
0
      continue;
6901
6902
0
    return false;
6903
0
  }
6904
0
  return true;
6905
106
}
6906
6907
0
bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
6908
0
  return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6909
0
      V, /* AllowLifetime */ true, /* AllowDroppable */ false);
6910
0
}
6911
106
bool llvm::onlyUsedByLifetimeMarkersOrDroppableInsts(const Value *V) {
6912
106
  return onlyUsedByLifetimeMarkersOrDroppableInstsHelper(
6913
106
      V, /* AllowLifetime */ true, /* AllowDroppable */ true);
6914
106
}
6915
6916
13.0M
bool llvm::isNotCrossLaneOperation(const Instruction *I) {
6917
13.0M
  if (auto *II = dyn_cast<IntrinsicInst>(I))
6918
12.7M
    return isTriviallyVectorizable(II->getIntrinsicID());
6919
305k
  auto *Shuffle = dyn_cast<ShuffleVectorInst>(I);
6920
305k
  return (!Shuffle || 
Shuffle->isSelect()29.6k
) &&
6921
305k
         
!isa<CallBase, BitCastInst, ExtractElementInst>(I)275k
;
6922
13.0M
}
6923
6924
bool llvm::isSafeToSpeculativelyExecute(
6925
    const Instruction *Inst, const Instruction *CtxI, AssumptionCache *AC,
6926
    const DominatorTree *DT, const TargetLibraryInfo *TLI, bool UseVariableInfo,
6927
988M
    bool IgnoreUBImplyingAttrs) {
6928
988M
  return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI,
6929
988M
                                                AC, DT, TLI, UseVariableInfo,
6930
988M
                                                IgnoreUBImplyingAttrs);
6931
988M
}
6932
6933
bool llvm::isSafeToSpeculativelyExecuteWithOpcode(
6934
    unsigned Opcode, const Instruction *Inst, const Instruction *CtxI,
6935
    AssumptionCache *AC, const DominatorTree *DT, const TargetLibraryInfo *TLI,
6936
988M
    bool UseVariableInfo, bool IgnoreUBImplyingAttrs) {
6937
#ifndef NDEBUG
6938
  if (Inst->getOpcode() != Opcode) {
6939
    // Check that the operands are actually compatible with the Opcode override.
6940
    auto hasEqualReturnAndLeadingOperandTypes =
6941
        [](const Instruction *Inst, unsigned NumLeadingOperands) {
6942
          if (Inst->getNumOperands() < NumLeadingOperands)
6943
            return false;
6944
          const Type *ExpectedType = Inst->getType();
6945
          for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
6946
            if (Inst->getOperand(ItOp)->getType() != ExpectedType)
6947
              return false;
6948
          return true;
6949
        };
6950
    assert(!Instruction::isBinaryOp(Opcode) ||
6951
           hasEqualReturnAndLeadingOperandTypes(Inst, 2));
6952
    assert(!Instruction::isUnaryOp(Opcode) ||
6953
           hasEqualReturnAndLeadingOperandTypes(Inst, 1));
6954
  }
6955
#endif
6956
6957
988M
  switch (Opcode) {
6958
465M
  default:
6959
465M
    return true;
6960
1.41M
  case Instruction::UDiv:
6961
2.80M
  case Instruction::URem: {
6962
    // x / y is undefined if y == 0.
6963
2.80M
    const APInt *V;
6964
2.80M
    if (match(Inst->getOperand(1), m_APInt(V)))
6965
1.63M
      return *V != 0;
6966
1.17M
    return false;
6967
2.80M
  }
6968
2.65M
  case Instruction::SDiv:
6969
2.96M
  case Instruction::SRem: {
6970
    // x / y is undefined if y == 0 or x == INT_MIN and y == -1
6971
2.96M
    const APInt *Numerator, *Denominator;
6972
2.96M
    if (!match(Inst->getOperand(1), m_APInt(Denominator)))
6973
473k
      return false;
6974
    // We cannot hoist this division if the denominator is 0.
6975
2.48M
    if (*Denominator == 0)
6976
6
      return false;
6977
    // It's safe to hoist if the denominator is not 0 or -1.
6978
2.48M
    if (!Denominator->isAllOnes())
6979
2.48M
      return true;
6980
    // At this point we know that the denominator is -1.  It is safe to hoist as
6981
    // long we know that the numerator is not INT_MIN.
6982
0
    if (match(Inst->getOperand(0), m_APInt(Numerator)))
6983
0
      return !Numerator->isMinSignedValue();
6984
    // The numerator *might* be MinSignedValue.
6985
0
    return false;
6986
0
  }
6987
40.1M
  case Instruction::Load: {
6988
40.1M
    if (!UseVariableInfo)
6989
4.08M
      return false;
6990
6991
36.0M
    const LoadInst *LI = dyn_cast<LoadInst>(Inst);
6992
36.0M
    if (!LI)
6993
0
      return false;
6994
36.0M
    if (mustSuppressSpeculation(*LI))
6995
438k
      return false;
6996
35.6M
    const DataLayout &DL = LI->getDataLayout();
6997
35.6M
    return isDereferenceableAndAlignedPointer(LI->getPointerOperand(),
6998
35.6M
                                              LI->getType(), LI->getAlign(), DL,
6999
35.6M
                                              CtxI, AC, DT, TLI);
7000
36.0M
  }
7001
310M
  case Instruction::Call: {
7002
310M
    auto *CI = dyn_cast<const CallInst>(Inst);
7003
310M
    if (!CI)
7004
0
      return false;
7005
310M
    const Function *Callee = CI->getCalledFunction();
7006
7007
    // The called function could have undefined behavior or side-effects, even
7008
    // if marked readnone nounwind.
7009
310M
    if (!Callee || 
!Callee->isSpeculatable()306M
)
7010
287M
      return false;
7011
    // Since the operands may be changed after hoisting, undefined behavior may
7012
    // be triggered by some UB-implying attributes.
7013
23.0M
    return IgnoreUBImplyingAttrs || 
!CI->hasUBImplyingAttrs()5.53M
;
7014
310M
  }
7015
0
  case Instruction::VAArg:
7016
10.9M
  case Instruction::Alloca:
7017
11.1M
  case Instruction::Invoke:
7018
11.1M
  case Instruction::CallBr:
7019
38.4M
  case Instruction::PHI:
7020
49.3M
  case Instruction::Store:
7021
69.8M
  case Instruction::Ret:
7022
145M
  case Instruction::Br:
7023
145M
  case Instruction::IndirectBr:
7024
147M
  case Instruction::Switch:
7025
152M
  case Instruction::Unreachable:
7026
152M
  case Instruction::Fence:
7027
157M
  case Instruction::AtomicRMW:
7028
157M
  case Instruction::AtomicCmpXchg:
7029
165M
  case Instruction::LandingPad:
7030
166M
  case Instruction::Resume:
7031
166M
  case Instruction::CatchSwitch:
7032
166M
  case Instruction::CatchPad:
7033
166M
  case Instruction::CatchRet:
7034
166M
  case Instruction::CleanupPad:
7035
166M
  case Instruction::CleanupRet:
7036
166M
    return false; // Misc instructions which have effects
7037
988M
  }
7038
988M
}
7039
7040
540M
bool llvm::mayHaveNonDefUseDependency(const Instruction &I) {
7041
540M
  if (I.mayReadOrWriteMemory())
7042
    // Memory dependency possible
7043
210M
    return true;
7044
330M
  if (!isSafeToSpeculativelyExecute(&I))
7045
    // Can't move above a maythrow call or infinite loop.  Or if an
7046
    // inalloca alloca, above a stacksave call.
7047
142M
    return true;
7048
187M
  if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7049
    // 1) Can't reorder two inf-loop calls, even if readonly
7050
    // 2) Also can't reorder an inf-loop call below a instruction which isn't
7051
    //    safe to speculative execute.  (Inverse of above)
7052
0
    return true;
7053
187M
  return false;
7054
187M
}
7055
7056
/// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
7057
133M
static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
7058
133M
  switch (OR) {
7059
132M
    case ConstantRange::OverflowResult::MayOverflow:
7060
132M
      return OverflowResult::MayOverflow;
7061
93.2k
    case ConstantRange::OverflowResult::AlwaysOverflowsLow:
7062
93.2k
      return OverflowResult::AlwaysOverflowsLow;
7063
412k
    case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
7064
412k
      return OverflowResult::AlwaysOverflowsHigh;
7065
470k
    case ConstantRange::OverflowResult::NeverOverflows:
7066
470k
      return OverflowResult::NeverOverflows;
7067
133M
  }
7068
133M
  
llvm_unreachable0
("Unknown OverflowResult");
7069
133M
}
7070
7071
/// Combine constant ranges from computeConstantRange() and computeKnownBits().
7072
ConstantRange
7073
llvm::computeConstantRangeIncludingKnownBits(const WithCache<const Value *> &V,
7074
                                             bool ForSigned,
7075
258M
                                             const SimplifyQuery &SQ) {
7076
258M
  ConstantRange CR1 =
7077
258M
      ConstantRange::fromKnownBits(V.getKnownBits(SQ), ForSigned);
7078
258M
  ConstantRange CR2 = computeConstantRange(V, ForSigned, SQ.IIQ.UseInstrInfo);
7079
258M
  ConstantRange::PreferredRangeType RangeType =
7080
258M
      ForSigned ? 
ConstantRange::Signed111M
:
ConstantRange::Unsigned147M
;
7081
258M
  return CR1.intersectWith(CR2, RangeType);
7082
258M
}
7083
7084
OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
7085
                                                   const Value *RHS,
7086
                                                   const SimplifyQuery &SQ,
7087
5.75M
                                                   bool IsNSW) {
7088
5.75M
  KnownBits LHSKnown = computeKnownBits(LHS, SQ);
7089
5.75M
  KnownBits RHSKnown = computeKnownBits(RHS, SQ);
7090
7091
  // mul nsw of two non-negative numbers is also nuw.
7092
5.75M
  if (IsNSW && 
LHSKnown.isNonNegative()2.22M
&&
RHSKnown.isNonNegative()365k
)
7093
23.1k
    return OverflowResult::NeverOverflows;
7094
7095
5.73M
  ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
7096
5.73M
  ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
7097
5.73M
  return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
7098
5.75M
}
7099
7100
OverflowResult llvm::computeOverflowForSignedMul(const Value *LHS,
7101
                                                 const Value *RHS,
7102
4.04M
                                                 const SimplifyQuery &SQ) {
7103
  // Multiplying n * m significant bits yields a result of n + m significant
7104
  // bits. If the total number of significant bits does not exceed the
7105
  // result bit width (minus 1), there is no overflow.
7106
  // This means if we have enough leading sign bits in the operands
7107
  // we can guarantee that the result does not overflow.
7108
  // Ref: "Hacker's Delight" by Henry Warren
7109
4.04M
  unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
7110
7111
  // Note that underestimating the number of sign bits gives a more
7112
  // conservative answer.
7113
4.04M
  unsigned SignBits =
7114
4.04M
      ::ComputeNumSignBits(LHS, SQ) + ::ComputeNumSignBits(RHS, SQ);
7115
7116
  // First handle the easy case: if we have enough sign bits there's
7117
  // definitely no overflow.
7118
4.04M
  if (SignBits > BitWidth + 1)
7119
24.4k
    return OverflowResult::NeverOverflows;
7120
7121
  // There are two ambiguous cases where there can be no overflow:
7122
  //   SignBits == BitWidth + 1    and
7123
  //   SignBits == BitWidth
7124
  // The second case is difficult to check, therefore we only handle the
7125
  // first case.
7126
4.02M
  if (SignBits == BitWidth + 1) {
7127
    // It overflows only when both arguments are negative and the true
7128
    // product is exactly the minimum negative number.
7129
    // E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
7130
    // For simplicity we just check if at least one side is not negative.
7131
3.32k
    KnownBits LHSKnown = computeKnownBits(LHS, SQ);
7132
3.32k
    KnownBits RHSKnown = computeKnownBits(RHS, SQ);
7133
3.32k
    if (LHSKnown.isNonNegative() || 
RHSKnown.isNonNegative()2.20k
)
7134
1.33k
      return OverflowResult::NeverOverflows;
7135
3.32k
  }
7136
4.01M
  return OverflowResult::MayOverflow;
7137
4.02M
}
7138
7139
OverflowResult
7140
llvm::computeOverflowForUnsignedAdd(const WithCache<const Value *> &LHS,
7141
                                    const WithCache<const Value *> &RHS,
7142
52.2M
                                    const SimplifyQuery &SQ) {
7143
52.2M
  ConstantRange LHSRange =
7144
52.2M
      computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ);
7145
52.2M
  ConstantRange RHSRange =
7146
52.2M
      computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ);
7147
52.2M
  return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
7148
52.2M
}
7149
7150
static OverflowResult
7151
computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7152
                            const WithCache<const Value *> &RHS,
7153
38.7M
                            const AddOperator *Add, const SimplifyQuery &SQ) {
7154
38.7M
  if (Add && 
Add->hasNoSignedWrap()0
) {
7155
0
    return OverflowResult::NeverOverflows;
7156
0
  }
7157
7158
  // If LHS and RHS each have at least two sign bits, the addition will look
7159
  // like
7160
  //
7161
  // XX..... +
7162
  // YY.....
7163
  //
7164
  // If the carry into the most significant position is 0, X and Y can't both
7165
  // be 1 and therefore the carry out of the addition is also 0.
7166
  //
7167
  // If the carry into the most significant position is 1, X and Y can't both
7168
  // be 0 and therefore the carry out of the addition is also 1.
7169
  //
7170
  // Since the carry into the most significant position is always equal to
7171
  // the carry out of the addition, there is no signed overflow.
7172
38.7M
  if (::ComputeNumSignBits(LHS, SQ) > 1 && 
::ComputeNumSignBits(RHS, SQ) > 1717k
)
7173
177k
    return OverflowResult::NeverOverflows;
7174
7175
38.5M
  ConstantRange LHSRange =
7176
38.5M
      computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ);
7177
38.5M
  ConstantRange RHSRange =
7178
38.5M
      computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ);
7179
38.5M
  OverflowResult OR =
7180
38.5M
      mapOverflowResult(LHSRange.signedAddMayOverflow(RHSRange));
7181
38.5M
  if (OR != OverflowResult::MayOverflow)
7182
37.0k
    return OR;
7183
7184
  // The remaining code needs Add to be available. Early returns if not so.
7185
38.4M
  if (!Add)
7186
38.4M
    return OverflowResult::MayOverflow;
7187
7188
  // If the sign of Add is the same as at least one of the operands, this add
7189
  // CANNOT overflow. If this can be determined from the known bits of the
7190
  // operands the above signedAddMayOverflow() check will have already done so.
7191
  // The only other way to improve on the known bits is from an assumption, so
7192
  // call computeKnownBitsFromContext() directly.
7193
0
  bool LHSOrRHSKnownNonNegative =
7194
0
      (LHSRange.isAllNonNegative() || RHSRange.isAllNonNegative());
7195
0
  bool LHSOrRHSKnownNegative =
7196
0
      (LHSRange.isAllNegative() || RHSRange.isAllNegative());
7197
0
  if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
7198
0
    KnownBits AddKnown(LHSRange.getBitWidth());
7199
0
    computeKnownBitsFromContext(Add, AddKnown, SQ);
7200
0
    if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
7201
0
        (AddKnown.isNegative() && LHSOrRHSKnownNegative))
7202
0
      return OverflowResult::NeverOverflows;
7203
0
  }
7204
7205
0
  return OverflowResult::MayOverflow;
7206
0
}
7207
7208
OverflowResult llvm::computeOverflowForUnsignedSub(const Value *LHS,
7209
                                                   const Value *RHS,
7210
20.3M
                                                   const SimplifyQuery &SQ) {
7211
  // X - (X % ?)
7212
  // The remainder of a value can't have greater magnitude than itself,
7213
  // so the subtraction can't overflow.
7214
7215
  // X - (X -nuw ?)
7216
  // In the minimal case, this would simplify to "?", so there's no subtract
7217
  // at all. But if this analysis is used to peek through casts, for example,
7218
  // then determining no-overflow may allow other transforms.
7219
7220
  // TODO: There are other patterns like this.
7221
  //       See simplifyICmpWithBinOpOnLHS() for candidates.
7222
20.3M
  if (match(RHS, m_URem(m_Specific(LHS), m_Value())) ||
7223
20.3M
      
match(RHS, m_NUWSub(m_Specific(LHS), m_Value()))20.3M
)
7224
2.66k
    if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
7225
1.90k
      return OverflowResult::NeverOverflows;
7226
7227
20.3M
  if (auto C = isImpliedByDomCondition(CmpInst::ICMP_UGE, LHS, RHS, SQ.CxtI,
7228
20.3M
                                       SQ.DL)) {
7229
128k
    if (*C)
7230
54.1k
      return OverflowResult::NeverOverflows;
7231
74.3k
    return OverflowResult::AlwaysOverflowsLow;
7232
128k
  }
7233
7234
20.2M
  ConstantRange LHSRange =
7235
20.2M
      computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/false, SQ);
7236
20.2M
  ConstantRange RHSRange =
7237
20.2M
      computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/false, SQ);
7238
20.2M
  return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange));
7239
20.3M
}
7240
7241
OverflowResult llvm::computeOverflowForSignedSub(const Value *LHS,
7242
                                                 const Value *RHS,
7243
16.6M
                                                 const SimplifyQuery &SQ) {
7244
  // X - (X % ?)
7245
  // The remainder of a value can't have greater magnitude than itself,
7246
  // so the subtraction can't overflow.
7247
7248
  // X - (X -nsw ?)
7249
  // In the minimal case, this would simplify to "?", so there's no subtract
7250
  // at all. But if this analysis is used to peek through casts, for example,
7251
  // then determining no-overflow may allow other transforms.
7252
16.6M
  if (match(RHS, m_SRem(m_Specific(LHS), m_Value())) ||
7253
16.6M
      
match(RHS, m_NSWSub(m_Specific(LHS), m_Value()))16.6M
)
7254
1.18k
    if (isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT))
7255
474
      return OverflowResult::NeverOverflows;
7256
7257
  // If LHS and RHS each have at least two sign bits, the subtraction
7258
  // cannot overflow.
7259
16.6M
  if (::ComputeNumSignBits(LHS, SQ) > 1 && 
::ComputeNumSignBits(RHS, SQ) > 11.30M
)
7260
132k
    return OverflowResult::NeverOverflows;
7261
7262
16.5M
  ConstantRange LHSRange =
7263
16.5M
      computeConstantRangeIncludingKnownBits(LHS, /*ForSigned=*/true, SQ);
7264
16.5M
  ConstantRange RHSRange =
7265
16.5M
      computeConstantRangeIncludingKnownBits(RHS, /*ForSigned=*/true, SQ);
7266
16.5M
  return mapOverflowResult(LHSRange.signedSubMayOverflow(RHSRange));
7267
16.6M
}
7268
7269
bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
7270
26.6k
                                     const DominatorTree &DT) {
7271
26.6k
  SmallVector<const BranchInst *, 2> GuardingBranches;
7272
26.6k
  SmallVector<const ExtractValueInst *, 2> Results;
7273
7274
53.3k
  for (const User *U : WO->users()) {
7275
53.3k
    if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
7276
53.3k
      assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
7277
7278
53.3k
      if (EVI->getIndices()[0] == 0)
7279
26.6k
        Results.push_back(EVI);
7280
26.6k
      else {
7281
26.6k
        assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
7282
7283
26.6k
        for (const auto *U : EVI->users())
7284
26.7k
          if (const auto *B = dyn_cast<BranchInst>(U)) {
7285
21.1k
            assert(B->isConditional() && "How else is it using an i1?");
7286
21.1k
            GuardingBranches.push_back(B);
7287
21.1k
          }
7288
26.6k
      }
7289
53.3k
    } else {
7290
      // We are using the aggregate directly in a way we don't want to analyze
7291
      // here (storing it to a global, say).
7292
2
      return false;
7293
2
    }
7294
53.3k
  }
7295
7296
26.6k
  auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
7297
21.1k
    BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
7298
21.1k
    if (!NoWrapEdge.isSingleEdge())
7299
0
      return false;
7300
7301
    // Check if all users of the add are provably no-wrap.
7302
21.1k
    for (const auto *Result : Results) {
7303
      // If the extractvalue itself is not executed on overflow, the we don't
7304
      // need to check each use separately, since domination is transitive.
7305
21.1k
      if (DT.dominates(NoWrapEdge, Result->getParent()))
7306
9.33k
        continue;
7307
7308
11.8k
      for (const auto &RU : Result->uses())
7309
18.4k
        if (!DT.dominates(NoWrapEdge, RU))
7310
52
          return false;
7311
11.8k
    }
7312
7313
21.1k
    return true;
7314
21.1k
  };
7315
7316
26.6k
  return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
7317
26.6k
}
7318
7319
/// Shifts return poison if shiftwidth is larger than the bitwidth.
7320
670k
static bool shiftAmountKnownInRange(const Value *ShiftAmount) {
7321
670k
  auto *C = dyn_cast<Constant>(ShiftAmount);
7322
670k
  if (!C)
7323
145k
    return false;
7324
7325
  // Shifts return poison if shiftwidth is larger than the bitwidth.
7326
525k
  SmallVector<const Constant *, 4> ShiftAmounts;
7327
525k
  if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) {
7328
201
    unsigned NumElts = FVTy->getNumElements();
7329
2.45k
    for (unsigned i = 0; i < NumElts; 
++i2.25k
)
7330
2.25k
      ShiftAmounts.push_back(C->getAggregateElement(i));
7331
525k
  } else if (isa<ScalableVectorType>(C->getType()))
7332
0
    return false; // Can't tell, just return false to be safe
7333
525k
  else
7334
525k
    ShiftAmounts.push_back(C);
7335
7336
527k
  
bool Safe = llvm::all_of(ShiftAmounts, [](const Constant *C) 525k
{
7337
527k
    auto *CI = dyn_cast_or_null<ConstantInt>(C);
7338
527k
    return CI && CI->getValue().ult(C->getType()->getIntegerBitWidth());
7339
527k
  });
7340
7341
525k
  return Safe;
7342
525k
}
7343
7344
enum class UndefPoisonKind {
7345
  PoisonOnly = (1 << 0),
7346
  UndefOnly = (1 << 1),
7347
  UndefOrPoison = PoisonOnly | UndefOnly,
7348
};
7349
7350
90.8M
static bool includesPoison(UndefPoisonKind Kind) {
7351
90.8M
  return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
7352
90.8M
}
7353
7354
152M
static bool includesUndef(UndefPoisonKind Kind) {
7355
152M
  return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
7356
152M
}
7357
7358
static bool canCreateUndefOrPoison(const Operator *Op, UndefPoisonKind Kind,
7359
88.9M
                                   bool ConsiderFlagsAndMetadata) {
7360
7361
88.9M
  if (ConsiderFlagsAndMetadata && 
includesPoison(Kind)87.7M
&&
7362
88.9M
      
Op->hasPoisonGeneratingAnnotations()53.3M
)
7363
7.77M
    return true;
7364
7365
81.2M
  unsigned Opcode = Op->getOpcode();
7366
7367
  // Check whether opcode is a poison/undef-generating operation
7368
81.2M
  switch (Opcode) {
7369
566k
  case Instruction::Shl:
7370
1.85M
  case Instruction::AShr:
7371
2.54M
  case Instruction::LShr:
7372
2.54M
    return includesPoison(Kind) && 
!shiftAmountKnownInRange(Op->getOperand(1))670k
;
7373
93.0k
  case Instruction::FPToSI:
7374
121k
  case Instruction::FPToUI:
7375
    // fptosi/ui yields poison if the resulting value does not fit in the
7376
    // destination type.
7377
121k
    return true;
7378
5.79M
  case Instruction::Call:
7379
5.79M
    if (auto *II = dyn_cast<IntrinsicInst>(Op)) {
7380
3.41M
      switch (II->getIntrinsicID()) {
7381
      // TODO: Add more intrinsics.
7382
16.5k
      case Intrinsic::ctlz:
7383
47.6k
      case Intrinsic::cttz:
7384
70.5k
      case Intrinsic::abs:
7385
70.5k
        if (cast<ConstantInt>(II->getArgOperand(1))->isNullValue())
7386
3.94k
          return false;
7387
66.5k
        break;
7388
66.5k
      case Intrinsic::ctpop:
7389
90.0k
      case Intrinsic::bswap:
7390
90.1k
      case Intrinsic::bitreverse:
7391
109k
      case Intrinsic::fshl:
7392
109k
      case Intrinsic::fshr:
7393
408k
      case Intrinsic::smax:
7394
683k
      case Intrinsic::smin:
7395
2.44M
      case Intrinsic::umax:
7396
2.98M
      case Intrinsic::umin:
7397
2.98M
      case Intrinsic::ptrmask:
7398
2.98M
      case Intrinsic::fptoui_sat:
7399
2.98M
      case Intrinsic::fptosi_sat:
7400
2.99M
      case Intrinsic::sadd_with_overflow:
7401
2.99M
      case Intrinsic::ssub_with_overflow:
7402
3.00M
      case Intrinsic::smul_with_overflow:
7403
3.07M
      case Intrinsic::uadd_with_overflow:
7404
3.07M
      case Intrinsic::usub_with_overflow:
7405
3.10M
      case Intrinsic::umul_with_overflow:
7406
3.11M
      case Intrinsic::sadd_sat:
7407
3.12M
      case Intrinsic::uadd_sat:
7408
3.12M
      case Intrinsic::ssub_sat:
7409
3.20M
      case Intrinsic::usub_sat:
7410
3.20M
        return false;
7411
0
      case Intrinsic::sshl_sat:
7412
0
      case Intrinsic::ushl_sat:
7413
0
        return includesPoison(Kind) &&
7414
0
               !shiftAmountKnownInRange(II->getArgOperand(1));
7415
18
      case Intrinsic::fma:
7416
45.1k
      case Intrinsic::fmuladd:
7417
47.5k
      case Intrinsic::sqrt:
7418
48.0k
      case Intrinsic::powi:
7419
48.2k
      case Intrinsic::sin:
7420
48.5k
      case Intrinsic::cos:
7421
48.9k
      case Intrinsic::pow:
7422
49.0k
      case Intrinsic::log:
7423
49.1k
      case Intrinsic::log10:
7424
49.2k
      case Intrinsic::log2:
7425
49.2k
      case Intrinsic::exp:
7426
49.3k
      case Intrinsic::exp2:
7427
49.3k
      case Intrinsic::exp10:
7428
112k
      case Intrinsic::fabs:
7429
113k
      case Intrinsic::copysign:
7430
114k
      case Intrinsic::floor:
7431
115k
      case Intrinsic::ceil:
7432
115k
      case Intrinsic::trunc:
7433
115k
      case Intrinsic::rint:
7434
115k
      case Intrinsic::nearbyint:
7435
116k
      case Intrinsic::round:
7436
116k
      case Intrinsic::roundeven:
7437
116k
      case Intrinsic::fptrunc_round:
7438
116k
      case Intrinsic::canonicalize:
7439
116k
      case Intrinsic::arithmetic_fence:
7440
116k
      case Intrinsic::minnum:
7441
119k
      case Intrinsic::maxnum:
7442
119k
      case Intrinsic::minimum:
7443
119k
      case Intrinsic::maximum:
7444
119k
      case Intrinsic::minimumnum:
7445
119k
      case Intrinsic::maximumnum:
7446
119k
      case Intrinsic::is_fpclass:
7447
119k
      case Intrinsic::ldexp:
7448
119k
      case Intrinsic::frexp:
7449
119k
        return false;
7450
1
      case Intrinsic::lround:
7451
1
      case Intrinsic::llround:
7452
2.02k
      case Intrinsic::lrint:
7453
2.53k
      case Intrinsic::llrint:
7454
        // If the value doesn't fit an unspecified value is returned (but this
7455
        // is not poison).
7456
2.53k
        return false;
7457
3.41M
      }
7458
3.41M
    }
7459
2.46M
    [[fallthrough]];
7460
2.46M
  case Instruction::CallBr:
7461
3.08M
  case Instruction::Invoke: {
7462
3.08M
    const auto *CB = cast<CallBase>(Op);
7463
3.08M
    return !CB->hasRetAttr(Attribute::NoUndef);
7464
2.46M
  }
7465
13.1k
  case Instruction::InsertElement:
7466
73.6k
  case Instruction::ExtractElement: {
7467
    // If index exceeds the length of the vector, it returns poison
7468
73.6k
    auto *VTy = cast<VectorType>(Op->getOperand(0)->getType());
7469
73.6k
    unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 
213.1k
:
160.5k
;
7470
73.6k
    auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp));
7471
73.6k
    if (includesPoison(Kind))
7472
53.2k
      return !Idx ||
7473
53.2k
             
Idx->getValue().uge(VTy->getElementCount().getKnownMinValue())53.2k
;
7474
20.3k
    return false;
7475
73.6k
  }
7476
12.0k
  case Instruction::ShuffleVector: {
7477
12.0k
    ArrayRef<int> Mask = isa<ConstantExpr>(Op)
7478
12.0k
                             ? 
cast<ConstantExpr>(Op)->getShuffleMask()0
7479
12.0k
                             : cast<ShuffleVectorInst>(Op)->getShuffleMask();
7480
12.0k
    return includesPoison(Kind) && 
is_contained(Mask, PoisonMaskElem)10.3k
;
7481
73.6k
  }
7482
16.2k
  case Instruction::FNeg:
7483
715k
  case Instruction::PHI:
7484
4.60M
  case Instruction::Select:
7485
6.02M
  case Instruction::ExtractValue:
7486
6.58M
  case Instruction::InsertValue:
7487
6.58M
  case Instruction::Freeze:
7488
26.7M
  case Instruction::ICmp:
7489
27.5M
  case Instruction::FCmp:
7490
28.0M
  case Instruction::GetElementPtr:
7491
28.0M
    return false;
7492
0
  case Instruction::AddrSpaceCast:
7493
0
    return true;
7494
44.0M
  default: {
7495
44.0M
    const auto *CE = dyn_cast<ConstantExpr>(Op);
7496
44.0M
    if (isa<CastInst>(Op) || 
(37.2M
CE37.2M
&&
CE->isCast()563k
))
7497
7.34M
      return false;
7498
36.6M
    else if (Instruction::isBinaryOp(Opcode))
7499
13.4M
      return false;
7500
    // Be conservative and return true.
7501
23.2M
    return true;
7502
44.0M
  }
7503
81.2M
  }
7504
81.2M
}
7505
7506
bool llvm::canCreateUndefOrPoison(const Operator *Op,
7507
483k
                                  bool ConsiderFlagsAndMetadata) {
7508
483k
  return ::canCreateUndefOrPoison(Op, UndefPoisonKind::UndefOrPoison,
7509
483k
                                  ConsiderFlagsAndMetadata);
7510
483k
}
7511
7512
12.4M
bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlagsAndMetadata) {
7513
12.4M
  return ::canCreateUndefOrPoison(Op, UndefPoisonKind::PoisonOnly,
7514
12.4M
                                  ConsiderFlagsAndMetadata);
7515
12.4M
}
7516
7517
static bool directlyImpliesPoison(const Value *ValAssumedPoison, const Value *V,
7518
38.9M
                                  unsigned Depth) {
7519
38.9M
  if (ValAssumedPoison == V)
7520
134k
    return true;
7521
7522
38.8M
  const unsigned MaxDepth = 2;
7523
38.8M
  if (Depth >= MaxDepth)
7524
6.19M
    return false;
7525
7526
32.6M
  if (const auto *I = dyn_cast<Instruction>(V)) {
7527
44.0M
    if (
any_of(I->operands(), [=](const Use &Op) 24.8M
{
7528
44.0M
          return propagatesPoison(Op) &&
7529
44.0M
                 
directlyImpliesPoison(ValAssumedPoison, Op, Depth + 1)27.0M
;
7530
44.0M
        }))
7531
142k
      return true;
7532
7533
    // V  = extractvalue V0, idx
7534
    // V2 = extractvalue V0, idx2
7535
    // V0's elements are all poison or not. (e.g., add_with_overflow)
7536
24.7M
    const WithOverflowInst *II;
7537
24.7M
    if (match(I, m_ExtractValue(m_WithOverflowInst(II))) &&
7538
24.7M
        
(34.3k
match(ValAssumedPoison, m_ExtractValue(m_Specific(II)))34.3k
||
7539
34.3k
         
llvm::is_contained(II->args(), ValAssumedPoison)28.4k
))
7540
5.93k
      return true;
7541
24.7M
  }
7542
32.4M
  return false;
7543
32.6M
}
7544
7545
static bool impliesPoison(const Value *ValAssumedPoison, const Value *V,
7546
14.0M
                          unsigned Depth) {
7547
14.0M
  if (isGuaranteedNotToBePoison(ValAssumedPoison))
7548
2.13M
    return true;
7549
7550
11.8M
  if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0))
7551
140k
    return true;
7552
7553
11.7M
  const unsigned MaxDepth = 2;
7554
11.7M
  if (Depth >= MaxDepth)
7555
1.44M
    return false;
7556
7557
10.2M
  const auto *I = dyn_cast<Instruction>(ValAssumedPoison);
7558
10.2M
  if (I && 
!canCreatePoison(cast<Operator>(I))10.1M
) {
7559
7.02M
    return all_of(I->operands(), [=](const Value *Op) {
7560
7.02M
      return impliesPoison(Op, V, Depth + 1);
7561
7.02M
    });
7562
6.08M
  }
7563
4.20M
  return false;
7564
10.2M
}
7565
7566
6.98M
bool llvm::impliesPoison(const Value *ValAssumedPoison, const Value *V) {
7567
6.98M
  return ::impliesPoison(ValAssumedPoison, V, /* Depth */ 0);
7568
6.98M
}
7569
7570
static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
7571
7572
static bool isGuaranteedNotToBeUndefOrPoison(
7573
    const Value *V, AssumptionCache *AC, const Instruction *CtxI,
7574
162M
    const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
7575
162M
  if (Depth >= MaxAnalysisRecursionDepth)
7576
6.44M
    return false;
7577
7578
156M
  if (isa<MetadataAsValue>(V))
7579
0
    return false;
7580
7581
156M
  if (const auto *A = dyn_cast<Argument>(V)) {
7582
38.1M
    if (A->hasAttribute(Attribute::NoUndef) ||
7583
38.1M
        
A->hasAttribute(Attribute::Dereferenceable)5.18M
||
7584
38.1M
        
A->hasAttribute(Attribute::DereferenceableOrNull)5.18M
)
7585
32.9M
      return true;
7586
38.1M
  }
7587
7588
123M
  if (auto *C = dyn_cast<Constant>(V)) {
7589
29.7M
    if (isa<PoisonValue>(C))
7590
380k
      return !includesPoison(Kind);
7591
7592
29.3M
    if (isa<UndefValue>(C))
7593
518k
      return !includesUndef(Kind);
7594
7595
28.8M
    if (isa<ConstantInt>(C) || 
isa<GlobalVariable>(C)6.70M
||
isa<ConstantFP>(C)6.21M
||
7596
28.8M
        
isa<ConstantPointerNull>(C)5.98M
||
isa<Function>(C)2.06M
)
7597
28.1M
      return true;
7598
7599
760k
    if (C->getType()->isVectorTy()) {
7600
100k
      if (isa<ConstantExpr>(C)) {
7601
        // Scalable vectors can use a ConstantExpr to build a splat.
7602
1.40k
        if (Constant *SplatC = C->getSplatValue())
7603
0
          if (isa<ConstantInt>(SplatC) || isa<ConstantFP>(SplatC))
7604
0
            return true;
7605
99.2k
      } else {
7606
99.2k
        if (includesUndef(Kind) && 
C->containsUndefElement()11.5k
)
7607
2
          return false;
7608
99.2k
        if (includesPoison(Kind) && 
C->containsPoisonElement()94.3k
)
7609
392
          return false;
7610
98.8k
        return !C->containsConstantExpression();
7611
99.2k
      }
7612
100k
    }
7613
760k
  }
7614
7615
  // Strip cast operations from a pointer value.
7616
  // Note that stripPointerCastsSameRepresentation can strip off getelementptr
7617
  // inbounds with zero offset. To guarantee that the result isn't poison, the
7618
  // stripped pointer is checked as it has to be pointing into an allocated
7619
  // object or be null `null` to ensure `inbounds` getelement pointers with a
7620
  // zero offset could not produce poison.
7621
  // It can strip off addrspacecast that do not change bit representation as
7622
  // well. We believe that such addrspacecast is equivalent to no-op.
7623
94.4M
  auto *StrippedV = V->stripPointerCastsSameRepresentation();
7624
94.4M
  if (isa<AllocaInst>(StrippedV) || 
isa<GlobalVariable>(StrippedV)94.3M
||
7625
94.4M
      
isa<Function>(StrippedV)94.3M
||
isa<ConstantPointerNull>(StrippedV)94.3M
)
7626
94.8k
    return true;
7627
7628
94.3M
  auto OpCheck = [&](const Value *V) {
7629
57.4M
    return isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth + 1, Kind);
7630
57.4M
  };
7631
7632
94.3M
  if (auto *Opr = dyn_cast<Operator>(V)) {
7633
    // If the value is a freeze instruction, then it can never
7634
    // be undef or poison.
7635
89.1M
    if (isa<FreezeInst>(V))
7636
182k
      return true;
7637
7638
88.9M
    if (const auto *CB = dyn_cast<CallBase>(V)) {
7639
7.17M
      if (CB->hasRetAttr(Attribute::NoUndef) ||
7640
7.17M
          
CB->hasRetAttr(Attribute::Dereferenceable)6.06M
||
7641
7.17M
          
CB->hasRetAttr(Attribute::DereferenceableOrNull)6.06M
)
7642
1.10M
        return true;
7643
7.17M
    }
7644
7645
87.8M
    if (const auto *PN = dyn_cast<PHINode>(V)) {
7646
11.8M
      unsigned Num = PN->getNumIncomingValues();
7647
11.8M
      bool IsWellDefined = true;
7648
18.9M
      for (unsigned i = 0; i < Num; 
++i7.12M
) {
7649
17.3M
        if (PN == PN->getIncomingValue(i))
7650
84.9k
          continue;
7651
17.2M
        auto *TI = PN->getIncomingBlock(i)->getTerminator();
7652
17.2M
        if (!isGuaranteedNotToBeUndefOrPoison(PN->getIncomingValue(i), AC, TI,
7653
17.2M
                                              DT, Depth + 1, Kind)) {
7654
10.2M
          IsWellDefined = false;
7655
10.2M
          break;
7656
10.2M
        }
7657
17.2M
      }
7658
11.8M
      if (IsWellDefined)
7659
1.60M
        return true;
7660
76.0M
    } else if (!::canCreateUndefOrPoison(Opr, Kind,
7661
76.0M
                                         /*ConsiderFlagsAndMetadata*/ true) &&
7662
76.0M
               
all_of(Opr->operands(), OpCheck)46.6M
)
7663
9.14M
      return true;
7664
87.8M
  }
7665
7666
82.3M
  if (auto *I = dyn_cast<LoadInst>(V))
7667
21.2M
    if (I->hasMetadata(LLVMContext::MD_noundef) ||
7668
21.2M
        
I->hasMetadata(LLVMContext::MD_dereferenceable)17.9M
||
7669
21.2M
        
I->hasMetadata(LLVMContext::MD_dereferenceable_or_null)17.9M
)
7670
3.36M
      return true;
7671
7672
78.9M
  if (programUndefinedIfUndefOrPoison(V, !includesUndef(Kind)))
7673
6.47M
    return true;
7674
7675
  // CxtI may be null or a cloned instruction.
7676
72.5M
  if (!CtxI || 
!CtxI->getParent()20.4M
||
!DT20.4M
)
7677
63.2M
    return false;
7678
7679
9.22M
  auto *DNode = DT->getNode(CtxI->getParent());
7680
9.22M
  if (!DNode)
7681
    // Unreachable block
7682
260
    return false;
7683
7684
  // If V is used as a branch condition before reaching CtxI, V cannot be
7685
  // undef or poison.
7686
  //   br V, BB1, BB2
7687
  // BB1:
7688
  //   CtxI ; V cannot be undef or poison here
7689
9.22M
  auto *Dominator = DNode->getIDom();
7690
  // This check is purely for compile time reasons: we can skip the IDom walk
7691
  // if what we are checking for includes undef and the value is not an integer.
7692
9.22M
  if (!includesUndef(Kind) || 
V->getType()->isIntegerTy()3.52M
)
7693
106M
    
while (8.92M
Dominator) {
7694
97.9M
      auto *TI = Dominator->getBlock()->getTerminator();
7695
7696
97.9M
      Value *Cond = nullptr;
7697
97.9M
      if (auto BI = dyn_cast_or_null<BranchInst>(TI)) {
7698
73.2M
        if (BI->isConditional())
7699
61.3M
          Cond = BI->getCondition();
7700
73.2M
      } else 
if (auto 24.6M
SI24.6M
= dyn_cast_or_null<SwitchInst>(TI)) {
7701
2.82M
        Cond = SI->getCondition();
7702
2.82M
      }
7703
7704
97.9M
      if (Cond) {
7705
64.1M
        if (Cond == V)
7706
2.82k
          return true;
7707
64.1M
        else if (!includesUndef(Kind) && 
isa<Operator>(Cond)38.0M
) {
7708
          // For poison, we can analyze further
7709
37.5M
          auto *Opr = cast<Operator>(Cond);
7710
74.9M
          if (
any_of(Opr->operands(), [V](const Use &U) 37.5M
{
7711
74.9M
                return V == U && 
propagatesPoison(U)119k
;
7712
74.9M
              }))
7713
100k
            return true;
7714
37.5M
        }
7715
64.1M
      }
7716
7717
97.8M
      Dominator = Dominator->getIDom();
7718
97.8M
    }
7719
7720
9.11M
  if (AC && 
getKnowledgeValidInContext(V, {Attribute::NoUndef}, *AC, CtxI, DT)9.05M
)
7721
0
    return true;
7722
7723
9.11M
  return false;
7724
9.11M
}
7725
7726
bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC,
7727
                                            const Instruction *CtxI,
7728
                                            const DominatorTree *DT,
7729
11.7M
                                            unsigned Depth) {
7730
11.7M
  return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7731
11.7M
                                            UndefPoisonKind::UndefOrPoison);
7732
11.7M
}
7733
7734
bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
7735
                                     const Instruction *CtxI,
7736
57.6M
                                     const DominatorTree *DT, unsigned Depth) {
7737
57.6M
  return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7738
57.6M
                                            UndefPoisonKind::PoisonOnly);
7739
57.6M
}
7740
7741
bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC,
7742
                                    const Instruction *CtxI,
7743
18.8M
                                    const DominatorTree *DT, unsigned Depth) {
7744
18.8M
  return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth,
7745
18.8M
                                            UndefPoisonKind::UndefOnly);
7746
18.8M
}
7747
7748
/// Return true if undefined behavior would provably be executed on the path to
7749
/// OnPathTo if Root produced a posion result.  Note that this doesn't say
7750
/// anything about whether OnPathTo is actually executed or whether Root is
7751
/// actually poison.  This can be used to assess whether a new use of Root can
7752
/// be added at a location which is control equivalent with OnPathTo (such as
7753
/// immediately before it) without introducing UB which didn't previously
7754
/// exist.  Note that a false result conveys no information.
7755
bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
7756
                                         Instruction *OnPathTo,
7757
7.83k
                                         DominatorTree *DT) {
7758
  // Basic approach is to assume Root is poison, propagate poison forward
7759
  // through all users we can easily track, and then check whether any of those
7760
  // users are provable UB and must execute before out exiting block might
7761
  // exit.
7762
7763
  // The set of all recursive users we've visited (which are assumed to all be
7764
  // poison because of said visit)
7765
7.83k
  SmallSet<const Value *, 16> KnownPoison;
7766
7.83k
  SmallVector<const Instruction*, 16> Worklist;
7767
7.83k
  Worklist.push_back(Root);
7768
28.9k
  while (!Worklist.empty()) {
7769
27.3k
    const Instruction *I = Worklist.pop_back_val();
7770
7771
    // If we know this must trigger UB on a path leading our target.
7772
27.3k
    if (mustTriggerUB(I, KnownPoison) && 
DT->dominates(I, OnPathTo)9.55k
)
7773
6.29k
      return true;
7774
7775
    // If we can't analyze propagation through this instruction, just skip it
7776
    // and transitive users.  Safe as false is a conservative result.
7777
21.0k
    if (I != Root && 
!any_of(I->operands(), [&KnownPoison](const Use &U) 11.2k
{
7778
18.5k
          return KnownPoison.contains(U) && 
propagatesPoison(U)12.5k
;
7779
18.5k
        }))
7780
5.86k
      continue;
7781
7782
15.2k
    if (KnownPoison.insert(I).second)
7783
13.1k
      for (const User *User : I->users())
7784
28.6k
        Worklist.push_back(cast<Instruction>(User));
7785
15.2k
  }
7786
7787
  // Might be non-UB, or might have a path we couldn't prove must execute on
7788
  // way to exiting bb.
7789
1.53k
  return false;
7790
7.83k
}
7791
7792
OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
7793
0
                                                 const SimplifyQuery &SQ) {
7794
0
  return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
7795
0
                                       Add, SQ);
7796
0
}
7797
7798
OverflowResult
7799
llvm::computeOverflowForSignedAdd(const WithCache<const Value *> &LHS,
7800
                                  const WithCache<const Value *> &RHS,
7801
38.7M
                                  const SimplifyQuery &SQ) {
7802
38.7M
  return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, SQ);
7803
38.7M
}
7804
7805
2.42G
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
7806
  // Note: An atomic operation isn't guaranteed to return in a reasonable amount
7807
  // of time because it's possible for another thread to interfere with it for an
7808
  // arbitrary length of time, but programs aren't allowed to rely on that.
7809
7810
  // If there is no successor, then execution can't transfer to it.
7811
2.42G
  if (isa<ReturnInst>(I))
7812
29.2M
    return false;
7813
2.39G
  if (isa<UnreachableInst>(I))
7814
5.46M
    return false;
7815
7816
  // Note: Do not add new checks here; instead, change Instruction::mayThrow or
7817
  // Instruction::willReturn.
7818
  //
7819
  // FIXME: Move this check into Instruction::willReturn.
7820
2.38G
  if (isa<CatchPadInst>(I)) {
7821
0
    switch (classifyEHPersonality(I->getFunction()->getPersonalityFn())) {
7822
0
    default:
7823
      // A catchpad may invoke exception object constructors and such, which
7824
      // in some languages can be arbitrary code, so be conservative by default.
7825
0
      return false;
7826
0
    case EHPersonality::CoreCLR:
7827
      // For CoreCLR, it just involves a type test.
7828
0
      return true;
7829
0
    }
7830
0
  }
7831
7832
  // An instruction that returns without throwing must transfer control flow
7833
  // to a successor.
7834
2.38G
  return !I->mayThrow() && 
I->willReturn()2.34G
;
7835
2.38G
}
7836
7837
11.5M
bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
7838
  // TODO: This is slightly conservative for invoke instruction since exiting
7839
  // via an exception *is* normal control for them.
7840
11.5M
  for (const Instruction &I : *BB)
7841
96.4M
    if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7842
3.20M
      return false;
7843
8.34M
  return true;
7844
11.5M
}
7845
7846
bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7847
   BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
7848
20.6M
   unsigned ScanLimit) {
7849
20.6M
  return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End),
7850
20.6M
                                                    ScanLimit);
7851
20.6M
}
7852
7853
bool llvm::isGuaranteedToTransferExecutionToSuccessor(
7854
98.3M
   iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) {
7855
98.3M
  assert(ScanLimit && "scan limit must be non-zero");
7856
190M
  for (const Instruction &I : Range) {
7857
190M
    if (--ScanLimit == 0)
7858
638k
      return false;
7859
190M
    if (!isGuaranteedToTransferExecutionToSuccessor(&I))
7860
1.68M
      return false;
7861
190M
  }
7862
95.9M
  return true;
7863
98.3M
}
7864
7865
bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
7866
0
                                                  const Loop *L) {
7867
  // The loop header is guaranteed to be executed for every iteration.
7868
  //
7869
  // FIXME: Relax this constraint to cover all basic blocks that are
7870
  // guaranteed to be executed at every iteration.
7871
0
  if (I->getParent() != L->getHeader()) return false;
7872
7873
0
  for (const Instruction &LI : *L->getHeader()) {
7874
0
    if (&LI == I) return true;
7875
0
    if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
7876
0
  }
7877
0
  llvm_unreachable("Instruction not contained in its own parent basic block.");
7878
0
}
7879
7880
177M
bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) {
7881
177M
  switch (IID) {
7882
  // TODO: Add more intrinsics.
7883
38.7k
  case Intrinsic::sadd_with_overflow:
7884
52.1k
  case Intrinsic::ssub_with_overflow:
7885
130k
  case Intrinsic::smul_with_overflow:
7886
648k
  case Intrinsic::uadd_with_overflow:
7887
648k
  case Intrinsic::usub_with_overflow:
7888
826k
  case Intrinsic::umul_with_overflow:
7889
    // If an input is a vector containing a poison element, the
7890
    // two output vectors (calculated results, overflow bits)'
7891
    // corresponding lanes are poison.
7892
826k
    return true;
7893
386k
  case Intrinsic::ctpop:
7894
1.36M
  case Intrinsic::ctlz:
7895
3.10M
  case Intrinsic::cttz:
7896
3.41M
  case Intrinsic::abs:
7897
5.31M
  case Intrinsic::smax:
7898
7.63M
  case Intrinsic::smin:
7899
10.8M
  case Intrinsic::umax:
7900
17.2M
  case Intrinsic::umin:
7901
17.3M
  case Intrinsic::scmp:
7902
17.3M
  case Intrinsic::is_fpclass:
7903
17.3M
  case Intrinsic::ptrmask:
7904
17.5M
  case Intrinsic::ucmp:
7905
17.5M
  case Intrinsic::bitreverse:
7906
18.6M
  case Intrinsic::bswap:
7907
18.7M
  case Intrinsic::sadd_sat:
7908
18.7M
  case Intrinsic::ssub_sat:
7909
18.7M
  case Intrinsic::sshl_sat:
7910
18.8M
  case Intrinsic::uadd_sat:
7911
19.5M
  case Intrinsic::usub_sat:
7912
19.5M
  case Intrinsic::ushl_sat:
7913
19.5M
  case Intrinsic::smul_fix:
7914
19.5M
  case Intrinsic::smul_fix_sat:
7915
19.5M
  case Intrinsic::umul_fix:
7916
19.5M
  case Intrinsic::umul_fix_sat:
7917
19.5M
  case Intrinsic::pow:
7918
19.5M
  case Intrinsic::powi:
7919
19.5M
  case Intrinsic::sin:
7920
19.5M
  case Intrinsic::sinh:
7921
19.5M
  case Intrinsic::cos:
7922
19.5M
  case Intrinsic::cosh:
7923
19.5M
  case Intrinsic::sincos:
7924
19.5M
  case Intrinsic::sincospi:
7925
19.5M
  case Intrinsic::tan:
7926
19.5M
  case Intrinsic::tanh:
7927
19.5M
  case Intrinsic::asin:
7928
19.5M
  case Intrinsic::acos:
7929
19.5M
  case Intrinsic::atan:
7930
19.5M
  case Intrinsic::atan2:
7931
19.5M
  case Intrinsic::canonicalize:
7932
19.7M
  case Intrinsic::sqrt:
7933
19.7M
  case Intrinsic::exp:
7934
19.7M
  case Intrinsic::exp2:
7935
19.7M
  case Intrinsic::exp10:
7936
19.8M
  case Intrinsic::log:
7937
19.8M
  case Intrinsic::log2:
7938
19.8M
  case Intrinsic::log10:
7939
19.8M
  case Intrinsic::modf:
7940
19.9M
  case Intrinsic::floor:
7941
19.9M
  case Intrinsic::ceil:
7942
19.9M
  case Intrinsic::trunc:
7943
19.9M
  case Intrinsic::rint:
7944
19.9M
  case Intrinsic::nearbyint:
7945
20.0M
  case Intrinsic::round:
7946
20.0M
  case Intrinsic::roundeven:
7947
20.0M
  case Intrinsic::lrint:
7948
20.0M
  case Intrinsic::llrint:
7949
20.0M
    return true;
7950
157M
  default:
7951
157M
    return false;
7952
177M
  }
7953
177M
}
7954
7955
136M
bool llvm::propagatesPoison(const Use &PoisonOp) {
7956
136M
  const Operator *I = cast<Operator>(PoisonOp.getUser());
7957
136M
  switch (I->getOpcode()) {
7958
4.81M
  case Instruction::Freeze:
7959
12.5M
  case Instruction::PHI:
7960
13.0M
  case Instruction::Invoke:
7961
13.0M
    return false;
7962
22.9M
  case Instruction::Select:
7963
22.9M
    return PoisonOp.getOperandNo() == 0;
7964
5.37M
  case Instruction::Call:
7965
5.37M
    if (auto *II = dyn_cast<IntrinsicInst>(I))
7966
3.25M
      return intrinsicPropagatesPoison(II->getIntrinsicID());
7967
2.11M
    return false;
7968
52.4M
  case Instruction::ICmp:
7969
53.8M
  case Instruction::FCmp:
7970
58.7M
  case Instruction::GetElementPtr:
7971
58.7M
    return true;
7972
36.1M
  default:
7973
36.1M
    if (isa<BinaryOperator>(I) || 
isa<UnaryOperator>(I)21.0M
||
isa<CastInst>(I)20.9M
)
7974
21.5M
      return true;
7975
7976
    // Be conservative and return false.
7977
14.5M
    return false;
7978
136M
  }
7979
136M
}
7980
7981
/// Enumerates all operands of \p I that are guaranteed to not be undef or
7982
/// poison. If the callback \p Handle returns true, stop processing and return
7983
/// true. Otherwise, return false.
7984
template <typename CallableT>
7985
static bool handleGuaranteedWellDefinedOps(const Instruction *I,
7986
721M
                                           const CallableT &Handle) {
7987
721M
  switch (I->getOpcode()) {
7988
33.9M
    case Instruction::Store:
7989
33.9M
      if (Handle(cast<StoreInst>(I)->getPointerOperand()))
7990
6.18M
        return true;
7991
27.7M
      break;
7992
7993
74.2M
    case Instruction::Load:
7994
74.2M
      if (Handle(cast<LoadInst>(I)->getPointerOperand()))
7995
18.5M
        return true;
7996
55.6M
      break;
7997
7998
    // Since dereferenceable attribute imply noundef, atomic operations
7999
    // also implicitly have noundef pointers too
8000
55.6M
    case Instruction::AtomicCmpXchg:
8001
121k
      if (Handle(cast<AtomicCmpXchgInst>(I)->getPointerOperand()))
8002
712
        return true;
8003
121k
      break;
8004
8005
297k
    case Instruction::AtomicRMW:
8006
297k
      if (Handle(cast<AtomicRMWInst>(I)->getPointerOperand()))
8007
6.39k
        return true;
8008
291k
      break;
8009
8010
45.1M
    case Instruction::Call:
8011
48.5M
    case Instruction::Invoke: {
8012
48.5M
      const CallBase *CB = cast<CallBase>(I);
8013
48.5M
      if (CB->isIndirectCall() && 
Handle(CB->getCalledOperand())401k
)
8014
3
        return true;
8015
133M
      
for (unsigned i = 0; 48.5M
i < CB->arg_size();
++i84.8M
)
8016
87.9M
        if ((CB->paramHasAttr(i, Attribute::NoUndef) ||
8017
87.9M
             
CB->paramHasAttr(i, Attribute::Dereferenceable)58.5M
||
8018
87.9M
             
CB->paramHasAttr(i, Attribute::DereferenceableOrNull)58.5M
) &&
8019
87.9M
            
Handle(CB->getArgOperand(i))29.3M
)
8020
3.09M
          return true;
8021
45.4M
      break;
8022
48.5M
    }
8023
45.4M
    case Instruction::Ret:
8024
4.75M
      if (I->getFunction()->hasRetAttribute(Attribute::NoUndef) &&
8025
4.75M
          
Handle(I->getOperand(0))2.53M
)
8026
62.1k
        return true;
8027
4.69M
      break;
8028
4.69M
    case Instruction::Switch:
8029
1.59M
      if (Handle(cast<SwitchInst>(I)->getCondition()))
8030
106k
        return true;
8031
1.48M
      break;
8032
96.7M
    case Instruction::Br: {
8033
96.7M
      auto *BR = cast<BranchInst>(I);
8034
96.7M
      if (BR->isConditional() && 
Handle(BR->getCondition())82.7M
)
8035
21.5M
        return true;
8036
75.2M
      break;
8037
96.7M
    }
8038
461M
    default:
8039
461M
      break;
8040
721M
  }
8041
8042
672M
  return false;
8043
721M
}
ValueTracking.cpp:bool handleGuaranteedWellDefinedOps<programUndefinedIfUndefOrPoison(llvm::Value const*, bool)::$_0>(llvm::Instruction const*, programUndefinedIfUndefOrPoison(llvm::Value const*, bool)::$_0 const&)
Line
Count
Source
7986
261M
                                           const CallableT &Handle) {
7987
261M
  switch (I->getOpcode()) {
7988
6.68M
    case Instruction::Store:
7989
6.68M
      if (Handle(cast<StoreInst>(I)->getPointerOperand()))
7990
259k
        return true;
7991
6.42M
      break;
7992
7993
17.4M
    case Instruction::Load:
7994
17.4M
      if (Handle(cast<LoadInst>(I)->getPointerOperand()))
7995
366k
        return true;
7996
17.1M
      break;
7997
7998
    // Since dereferenceable attribute imply noundef, atomic operations
7999
    // also implicitly have noundef pointers too
8000
17.1M
    case Instruction::AtomicCmpXchg:
8001
38.2k
      if (Handle(cast<AtomicCmpXchgInst>(I)->getPointerOperand()))
8002
0
        return true;
8003
38.2k
      break;
8004
8005
38.2k
    case Instruction::AtomicRMW:
8006
2.62k
      if (Handle(cast<AtomicRMWInst>(I)->getPointerOperand()))
8007
1
        return true;
8008
2.62k
      break;
8009
8010
19.3M
    case Instruction::Call:
8011
20.6M
    case Instruction::Invoke: {
8012
20.6M
      const CallBase *CB = cast<CallBase>(I);
8013
20.6M
      if (CB->isIndirectCall() && 
Handle(CB->getCalledOperand())79.0k
)
8014
0
        return true;
8015
55.2M
      
for (unsigned i = 0; 20.6M
i < CB->arg_size();
++i34.6M
)
8016
34.7M
        if ((CB->paramHasAttr(i, Attribute::NoUndef) ||
8017
34.7M
             
CB->paramHasAttr(i, Attribute::Dereferenceable)22.5M
||
8018
34.7M
             
CB->paramHasAttr(i, Attribute::DereferenceableOrNull)22.5M
) &&
8019
34.7M
            
Handle(CB->getArgOperand(i))12.1M
)
8020
37.5k
          return true;
8021
20.5M
      break;
8022
20.6M
    }
8023
20.5M
    case Instruction::Ret:
8024
3.22M
      if (I->getFunction()->hasRetAttribute(Attribute::NoUndef) &&
8025
3.22M
          
Handle(I->getOperand(0))1.89M
)
8026
176
        return true;
8027
3.22M
      break;
8028
3.22M
    case Instruction::Switch:
8029
516k
      if (Handle(cast<SwitchInst>(I)->getCondition()))
8030
44.6k
        return true;
8031
471k
      break;
8032
24.7M
    case Instruction::Br: {
8033
24.7M
      auto *BR = cast<BranchInst>(I);
8034
24.7M
      if (BR->isConditional() && 
Handle(BR->getCondition())19.7M
)
8035
221k
        return true;
8036
24.5M
      break;
8037
24.7M
    }
8038
188M
    default:
8039
188M
      break;
8040
261M
  }
8041
8042
260M
  return false;
8043
261M
}
ValueTracking.cpp:bool handleGuaranteedWellDefinedOps<llvm::mustTriggerUB(llvm::Instruction const*, llvm::SmallPtrSetImpl<llvm::Value const*> const&)::$_0>(llvm::Instruction const*, llvm::mustTriggerUB(llvm::Instruction const*, llvm::SmallPtrSetImpl<llvm::Value const*> const&)::$_0 const&)
Line
Count
Source
7986
460M
                                           const CallableT &Handle) {
7987
460M
  switch (I->getOpcode()) {
7988
27.2M
    case Instruction::Store:
7989
27.2M
      if (Handle(cast<StoreInst>(I)->getPointerOperand()))
7990
5.92M
        return true;
7991
21.3M
      break;
7992
7993
56.7M
    case Instruction::Load:
7994
56.7M
      if (Handle(cast<LoadInst>(I)->getPointerOperand()))
7995
18.2M
        return true;
7996
38.5M
      break;
7997
7998
    // Since dereferenceable attribute imply noundef, atomic operations
7999
    // also implicitly have noundef pointers too
8000
38.5M
    case Instruction::AtomicCmpXchg:
8001
83.6k
      if (Handle(cast<AtomicCmpXchgInst>(I)->getPointerOperand()))
8002
712
        return true;
8003
82.9k
      break;
8004
8005
294k
    case Instruction::AtomicRMW:
8006
294k
      if (Handle(cast<AtomicRMWInst>(I)->getPointerOperand()))
8007
6.39k
        return true;
8008
288k
      break;
8009
8010
25.7M
    case Instruction::Call:
8011
27.9M
    case Instruction::Invoke: {
8012
27.9M
      const CallBase *CB = cast<CallBase>(I);
8013
27.9M
      if (CB->isIndirectCall() && 
Handle(CB->getCalledOperand())322k
)
8014
3
        return true;
8015
78.1M
      
for (unsigned i = 0; 27.9M
i < CB->arg_size();
++i50.1M
)
8016
53.2M
        if ((CB->paramHasAttr(i, Attribute::NoUndef) ||
8017
53.2M
             
CB->paramHasAttr(i, Attribute::Dereferenceable)36.0M
||
8018
53.2M
             
CB->paramHasAttr(i, Attribute::DereferenceableOrNull)36.0M
) &&
8019
53.2M
            
Handle(CB->getArgOperand(i))17.1M
)
8020
3.06M
          return true;
8021
24.8M
      break;
8022
27.9M
    }
8023
24.8M
    case Instruction::Ret:
8024
1.52M
      if (I->getFunction()->hasRetAttribute(Attribute::NoUndef) &&
8025
1.52M
          
Handle(I->getOperand(0))645k
)
8026
61.9k
        return true;
8027
1.46M
      break;
8028
1.46M
    case Instruction::Switch:
8029
1.07M
      if (Handle(cast<SwitchInst>(I)->getCondition()))
8030
61.8k
        return true;
8031
1.01M
      break;
8032
71.9M
    case Instruction::Br: {
8033
71.9M
      auto *BR = cast<BranchInst>(I);
8034
71.9M
      if (BR->isConditional() && 
Handle(BR->getCondition())63.0M
)
8035
21.2M
        return true;
8036
50.6M
      break;
8037
71.9M
    }
8038
273M
    default:
8039
273M
      break;
8040
460M
  }
8041
8042
411M
  return false;
8043
460M
}
8044
8045
/// Enumerates all operands of \p I that are guaranteed to not be poison.
8046
template <typename CallableT>
8047
static bool handleGuaranteedNonPoisonOps(const Instruction *I,
8048
460M
                                         const CallableT &Handle) {
8049
460M
  if (handleGuaranteedWellDefinedOps(I, Handle))
8050
48.6M
    return true;
8051
411M
  switch (I->getOpcode()) {
8052
  // Divisors of these operations are allowed to be partially undef.
8053
218k
  case Instruction::UDiv:
8054
971k
  case Instruction::SDiv:
8055
1.31M
  case Instruction::URem:
8056
1.46M
  case Instruction::SRem:
8057
1.46M
    return Handle(I->getOperand(1));
8058
410M
  default:
8059
410M
    return false;
8060
411M
  }
8061
411M
}
8062
8063
bool llvm::mustTriggerUB(const Instruction *I,
8064
460M
                         const SmallPtrSetImpl<const Value *> &KnownPoison) {
8065
460M
  return handleGuaranteedNonPoisonOps(
8066
460M
      I, [&](const Value *V) 
{ return KnownPoison.count(V); }168M
);
8067
460M
}
8068
8069
static bool programUndefinedIfUndefOrPoison(const Value *V,
8070
136M
                                            bool PoisonOnly) {
8071
  // We currently only look for uses of values within the same basic
8072
  // block, as that makes it easier to guarantee that the uses will be
8073
  // executed given that Inst is executed.
8074
  //
8075
  // FIXME: Expand this to consider uses beyond the same basic block. To do
8076
  // this, look out for the distinction between post-dominance and strong
8077
  // post-dominance.
8078
136M
  const BasicBlock *BB = nullptr;
8079
136M
  BasicBlock::const_iterator Begin;
8080
136M
  if (const auto *Inst = dyn_cast<Instruction>(V)) {
8081
130M
    BB = Inst->getParent();
8082
130M
    Begin = Inst->getIterator();
8083
130M
    Begin++;
8084
130M
  } else 
if (const auto *5.39M
Arg5.39M
= dyn_cast<Argument>(V)) {
8085
5.18M
    if (Arg->getParent()->isDeclaration())
8086
0
      return false;
8087
5.18M
    BB = &Arg->getParent()->getEntryBlock();
8088
5.18M
    Begin = BB->begin();
8089
5.18M
  } else {
8090
212k
    return false;
8091
212k
  }
8092
8093
  // Limit number of instructions we look at, to avoid scanning through large
8094
  // blocks. The current limit is chosen arbitrarily.
8095
135M
  unsigned ScanLimit = 32;
8096
135M
  BasicBlock::const_iterator End = BB->end();
8097
8098
135M
  if (!PoisonOnly) {
8099
    // Since undef does not propagate eagerly, be conservative & just check
8100
    // whether a value is directly passed to an instruction that must take
8101
    // well-defined operands.
8102
8103
262M
    for (const auto &I : make_range(Begin, End)) {
8104
262M
      if (--ScanLimit == 0)
8105
1.26M
        break;
8106
8107
261M
      if (handleGuaranteedWellDefinedOps(&I, [V](const Value *WellDefinedOp) {
8108
58.6M
            return WellDefinedOp == V;
8109
58.6M
          }))
8110
929k
        return true;
8111
8112
260M
      if (!isGuaranteedToTransferExecutionToSuccessor(&I))
8113
8.42M
        break;
8114
260M
    }
8115
34.7M
    return false;
8116
35.6M
  }
8117
8118
  // Set of instructions that we have proved will yield poison if Inst
8119
  // does.
8120
100M
  SmallSet<const Value *, 16> YieldsPoison;
8121
100M
  SmallSet<const BasicBlock *, 4> Visited;
8122
8123
100M
  YieldsPoison.insert(V);
8124
100M
  Visited.insert(BB);
8125
8126
109M
  while (true) {
8127
456M
    for (const auto &I : make_range(Begin, End)) {
8128
456M
      if (--ScanLimit == 0)
8129
1.68M
        return false;
8130
455M
      if (mustTriggerUB(&I, YieldsPoison))
8131
47.7M
        return true;
8132
407M
      if (!isGuaranteedToTransferExecutionToSuccessor(&I))
8133
7.37M
        return false;
8134
8135
      // If an operand is poison and propagates it, mark I as yielding poison.
8136
963M
      
for (const Use &Op : I.operands())400M
{
8137
963M
        if (YieldsPoison.count(Op) && 
propagatesPoison(Op)88.2M
) {
8138
61.0M
          YieldsPoison.insert(&I);
8139
61.0M
          break;
8140
61.0M
        }
8141
963M
      }
8142
8143
      // Special handling for select, which returns poison if its operand 0 is
8144
      // poison (handled in the loop above) *or* if both its true/false operands
8145
      // are poison (handled here).
8146
400M
      if (I.getOpcode() == Instruction::Select &&
8147
400M
          
YieldsPoison.count(I.getOperand(1))28.3M
&&
8148
400M
          
YieldsPoison.count(I.getOperand(2))9.43M
) {
8149
92.7k
        YieldsPoison.insert(&I);
8150
92.7k
      }
8151
400M
    }
8152
8153
52.2M
    BB = BB->getSingleSuccessor();
8154
52.2M
    if (!BB || 
!Visited.insert(BB).second8.92M
)
8155
43.2M
      break;
8156
8157
8.92M
    Begin = BB->getFirstNonPHIIt();
8158
8.92M
    End = BB->end();
8159
8.92M
  }
8160
43.2M
  return false;
8161
100M
}
8162
8163
0
bool llvm::programUndefinedIfUndefOrPoison(const Instruction *Inst) {
8164
0
  return ::programUndefinedIfUndefOrPoison(Inst, false);
8165
0
}
8166
8167
57.0M
bool llvm::programUndefinedIfPoison(const Instruction *Inst) {
8168
57.0M
  return ::programUndefinedIfUndefOrPoison(Inst, true);
8169
57.0M
}
8170
8171
4.62M
static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
8172
4.62M
  if (FMF.noNaNs())
8173
210k
    return true;
8174
8175
4.41M
  if (auto *C = dyn_cast<ConstantFP>(V))
8176
893k
    return !C->isNaN();
8177
8178
3.51M
  if (auto *C = dyn_cast<ConstantDataVector>(V)) {
8179
4.95k
    if (!C->getElementType()->isFloatingPointTy())
8180
0
      return false;
8181
30.9k
    
for (unsigned I = 0, E = C->getNumElements(); 4.95k
I < E;
++I25.9k
) {
8182
25.9k
      if (C->getElementAsAPFloat(I).isNaN())
8183
0
        return false;
8184
25.9k
    }
8185
4.95k
    return true;
8186
4.95k
  }
8187
8188
3.51M
  if (isa<ConstantAggregateZero>(V))
8189
8.55k
    return true;
8190
8191
3.50M
  return false;
8192
3.51M
}
8193
8194
508k
static bool isKnownNonZero(const Value *V) {
8195
508k
  if (auto *C = dyn_cast<ConstantFP>(V))
8196
113k
    return !C->isZero();
8197
8198
394k
  if (auto *C = dyn_cast<ConstantDataVector>(V)) {
8199
14.8k
    if (!C->getElementType()->isFloatingPointTy())
8200
0
      return false;
8201
101k
    
for (unsigned I = 0, E = C->getNumElements(); 14.8k
I < E;
++I86.1k
) {
8202
86.1k
      if (C->getElementAsAPFloat(I).isZero())
8203
0
        return false;
8204
86.1k
    }
8205
14.8k
    return true;
8206
14.8k
  }
8207
8208
379k
  return false;
8209
394k
}
8210
8211
/// Match clamp pattern for float types without care about NaNs or signed zeros.
8212
/// Given non-min/max outer cmp/select from the clamp pattern this
8213
/// function recognizes if it can be substitued by a "canonical" min/max
8214
/// pattern.
8215
static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
8216
                                               Value *CmpLHS, Value *CmpRHS,
8217
                                               Value *TrueVal, Value *FalseVal,
8218
37.8k
                                               Value *&LHS, Value *&RHS) {
8219
  // Try to match
8220
  //   X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
8221
  //   X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
8222
  // and return description of the outer Max/Min.
8223
8224
  // First, check if select has inverse order:
8225
37.8k
  if (CmpRHS == FalseVal) {
8226
633
    std::swap(TrueVal, FalseVal);
8227
633
    Pred = CmpInst::getInversePredicate(Pred);
8228
633
  }
8229
8230
  // Assume success now. If there's no match, callers should not use these anyway.
8231
37.8k
  LHS = TrueVal;
8232
37.8k
  RHS = FalseVal;
8233
8234
37.8k
  const APFloat *FC1;
8235
37.8k
  if (CmpRHS != TrueVal || 
!match(CmpRHS, m_APFloat(FC1))1.69k
||
!FC1->isFinite()673
)
8236
37.1k
    return {SPF_UNKNOWN, SPNB_NA, false};
8237
8238
673
  const APFloat *FC2;
8239
673
  switch (Pred) {
8240
40
  case CmpInst::FCMP_OLT:
8241
40
  case CmpInst::FCMP_OLE:
8242
40
  case CmpInst::FCMP_ULT:
8243
54
  case CmpInst::FCMP_ULE:
8244
54
    if (match(FalseVal, m_OrdOrUnordFMin(m_Specific(CmpLHS), m_APFloat(FC2))) &&
8245
54
        
*FC1 < *FC20
)
8246
0
      return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false};
8247
54
    break;
8248
54
  case CmpInst::FCMP_OGT:
8249
0
  case CmpInst::FCMP_OGE:
8250
0
  case CmpInst::FCMP_UGT:
8251
619
  case CmpInst::FCMP_UGE:
8252
619
    if (match(FalseVal, m_OrdOrUnordFMax(m_Specific(CmpLHS), m_APFloat(FC2))) &&
8253
619
        
*FC1 > *FC20
)
8254
0
      return {SPF_FMINNUM, SPNB_RETURNS_ANY, false};
8255
619
    break;
8256
619
  default:
8257
0
    break;
8258
673
  }
8259
8260
673
  return {SPF_UNKNOWN, SPNB_NA, false};
8261
673
}
8262
8263
/// Recognize variations of:
8264
///   CLAMP(v,l,h) ==> ((v) < (l) ? (l) : ((v) > (h) ? (h) : (v)))
8265
static SelectPatternResult matchClamp(CmpInst::Predicate Pred,
8266
                                      Value *CmpLHS, Value *CmpRHS,
8267
29.7M
                                      Value *TrueVal, Value *FalseVal) {
8268
  // Swap the select operands and predicate to match the patterns below.
8269
29.7M
  if (CmpRHS != TrueVal) {
8270
29.4M
    Pred = ICmpInst::getSwappedPredicate(Pred);
8271
29.4M
    std::swap(TrueVal, FalseVal);
8272
29.4M
  }
8273
29.7M
  const APInt *C1;
8274
29.7M
  if (CmpRHS == TrueVal && 
match(CmpRHS, m_APInt(C1))1.06M
) {
8275
595k
    const APInt *C2;
8276
    // (X <s C1) ? C1 : SMIN(X, C2) ==> SMAX(SMIN(X, C2), C1)
8277
595k
    if (match(FalseVal, m_SMin(m_Specific(CmpLHS), m_APInt(C2))) &&
8278
595k
        
C1->slt(*C2)5.36k
&&
Pred == CmpInst::ICMP_SLT5.29k
)
8279
5.26k
      return {SPF_SMAX, SPNB_NA, false};
8280
8281
    // (X >s C1) ? C1 : SMAX(X, C2) ==> SMIN(SMAX(X, C2), C1)
8282
590k
    if (match(FalseVal, m_SMax(m_Specific(CmpLHS), m_APInt(C2))) &&
8283
590k
        
C1->sgt(*C2)14.6k
&&
Pred == CmpInst::ICMP_SGT14.6k
)
8284
14.6k
      return {SPF_SMIN, SPNB_NA, false};
8285
8286
    // (X <u C1) ? C1 : UMIN(X, C2) ==> UMAX(UMIN(X, C2), C1)
8287
575k
    if (match(FalseVal, m_UMin(m_Specific(CmpLHS), m_APInt(C2))) &&
8288
575k
        
C1->ult(*C2)3.75k
&&
Pred == CmpInst::ICMP_ULT3.75k
)
8289
1.25k
      return {SPF_UMAX, SPNB_NA, false};
8290
8291
    // (X >u C1) ? C1 : UMAX(X, C2) ==> UMIN(UMAX(X, C2), C1)
8292
574k
    if (match(FalseVal, m_UMax(m_Specific(CmpLHS), m_APInt(C2))) &&
8293
574k
        
C1->ugt(*C2)319
&&
Pred == CmpInst::ICMP_UGT313
)
8294
121
      return {SPF_UMIN, SPNB_NA, false};
8295
574k
  }
8296
29.7M
  return {SPF_UNKNOWN, SPNB_NA, false};
8297
29.7M
}
8298
8299
/// Recognize variations of:
8300
///   a < c ? min(a,b) : min(b,c) ==> min(min(a,b),min(b,c))
8301
static SelectPatternResult matchMinMaxOfMinMax(CmpInst::Predicate Pred,
8302
                                               Value *CmpLHS, Value *CmpRHS,
8303
                                               Value *TVal, Value *FVal,
8304
29.7M
                                               unsigned Depth) {
8305
  // TODO: Allow FP min/max with nnan/nsz.
8306
29.7M
  assert(CmpInst::isIntPredicate(Pred) && "Expected integer comparison");
8307
8308
29.7M
  Value *A = nullptr, *B = nullptr;
8309
29.7M
  SelectPatternResult L = matchSelectPattern(TVal, A, B, nullptr, Depth + 1);
8310
29.7M
  if (!SelectPatternResult::isMinOrMax(L.Flavor))
8311
29.7M
    return {SPF_UNKNOWN, SPNB_NA, false};
8312
8313
2.46k
  Value *C = nullptr, *D = nullptr;
8314
2.46k
  SelectPatternResult R = matchSelectPattern(FVal, C, D, nullptr, Depth + 1);
8315
2.46k
  if (L.Flavor != R.Flavor)
8316
2.43k
    return {SPF_UNKNOWN, SPNB_NA, false};
8317
8318
  // We have something like: x Pred y ? min(a, b) : min(c, d).
8319
  // Try to match the compare to the min/max operations of the select operands.
8320
  // First, make sure we have the right compare predicate.
8321
28
  switch (L.Flavor) {
8322
13
  case SPF_SMIN:
8323
13
    if (Pred == ICmpInst::ICMP_SGT || 
Pred == ICmpInst::ICMP_SGE10
) {
8324
3
      Pred = ICmpInst::getSwappedPredicate(Pred);
8325
3
      std::swap(CmpLHS, CmpRHS);
8326
3
    }
8327
13
    if (Pred == ICmpInst::ICMP_SLT || 
Pred == ICmpInst::ICMP_SLE3
)
8328
10
      break;
8329
3
    return {SPF_UNKNOWN, SPNB_NA, false};
8330
5
  case SPF_SMAX:
8331
5
    if (Pred == ICmpInst::ICMP_SLT || 
Pred == ICmpInst::ICMP_SLE3
) {
8332
2
      Pred = ICmpInst::getSwappedPredicate(Pred);
8333
2
      std::swap(CmpLHS, CmpRHS);
8334
2
    }
8335
5
    if (Pred == ICmpInst::ICMP_SGT || 
Pred == ICmpInst::ICMP_SGE3
)
8336
2
      break;
8337
3
    return {SPF_UNKNOWN, SPNB_NA, false};
8338
4
  case SPF_UMIN:
8339
4
    if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
8340
0
      Pred = ICmpInst::getSwappedPredicate(Pred);
8341
0
      std::swap(CmpLHS, CmpRHS);
8342
0
    }
8343
4
    if (Pred == ICmpInst::ICMP_ULT || 
Pred == ICmpInst::ICMP_ULE0
)
8344
4
      break;
8345
0
    return {SPF_UNKNOWN, SPNB_NA, false};
8346
6
  case SPF_UMAX:
8347
6
    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
8348
0
      Pred = ICmpInst::getSwappedPredicate(Pred);
8349
0
      std::swap(CmpLHS, CmpRHS);
8350
0
    }
8351
6
    if (Pred == ICmpInst::ICMP_UGT || 
Pred == ICmpInst::ICMP_UGE0
)
8352
6
      break;
8353
0
    return {SPF_UNKNOWN, SPNB_NA, false};
8354
0
  default:
8355
0
    return {SPF_UNKNOWN, SPNB_NA, false};
8356
28
  }
8357
8358
  // If there is a common operand in the already matched min/max and the other
8359
  // min/max operands match the compare operands (either directly or inverted),
8360
  // then this is min/max of the same flavor.
8361
8362
  // a pred c ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8363
  // ~c pred ~a ? m(a, b) : m(c, b) --> m(m(a, b), m(c, b))
8364
22
  if (D == B) {
8365
4
    if ((CmpLHS == A && 
CmpRHS == C0
) || (match(C, m_Not(m_Specific(CmpLHS))) &&
8366
4
                                         
match(A, m_Not(m_Specific(CmpRHS)))0
))
8367
0
      return {L.Flavor, SPNB_NA, false};
8368
4
  }
8369
  // a pred d ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8370
  // ~d pred ~a ? m(a, b) : m(b, d) --> m(m(a, b), m(b, d))
8371
22
  if (C == B) {
8372
0
    if ((CmpLHS == A && CmpRHS == D) || (match(D, m_Not(m_Specific(CmpLHS))) &&
8373
0
                                         match(A, m_Not(m_Specific(CmpRHS)))))
8374
0
      return {L.Flavor, SPNB_NA, false};
8375
0
  }
8376
  // b pred c ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8377
  // ~c pred ~b ? m(a, b) : m(c, a) --> m(m(a, b), m(c, a))
8378
22
  if (D == A) {
8379
0
    if ((CmpLHS == B && CmpRHS == C) || (match(C, m_Not(m_Specific(CmpLHS))) &&
8380
0
                                         match(B, m_Not(m_Specific(CmpRHS)))))
8381
0
      return {L.Flavor, SPNB_NA, false};
8382
0
  }
8383
  // b pred d ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8384
  // ~d pred ~b ? m(a, b) : m(a, d) --> m(m(a, b), m(a, d))
8385
22
  if (C == A) {
8386
8
    if ((CmpLHS == B && CmpRHS == D) || 
(0
match(D, m_Not(m_Specific(CmpLHS)))0
&&
8387
0
                                         match(B, m_Not(m_Specific(CmpRHS)))))
8388
8
      return {L.Flavor, SPNB_NA, false};
8389
8
  }
8390
8391
14
  return {SPF_UNKNOWN, SPNB_NA, false};
8392
22
}
8393
8394
/// If the input value is the result of a 'not' op, constant integer, or vector
8395
/// splat of a constant integer, return the bitwise-not source value.
8396
/// TODO: This could be extended to handle non-splat vector integer constants.
8397
59.5M
static Value *getNotValue(Value *V) {
8398
59.5M
  Value *NotV;
8399
59.5M
  if (match(V, m_Not(m_Value(NotV))))
8400
73.9k
    return NotV;
8401
8402
59.4M
  const APInt *C;
8403
59.4M
  if (match(V, m_APInt(C)))
8404
28.3M
    return ConstantInt::get(V->getType(), ~(*C));
8405
8406
31.1M
  return nullptr;
8407
59.4M
}
8408
8409
/// Match non-obvious integer minimum and maximum sequences.
8410
static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
8411
                                       Value *CmpLHS, Value *CmpRHS,
8412
                                       Value *TrueVal, Value *FalseVal,
8413
                                       Value *&LHS, Value *&RHS,
8414
29.7M
                                       unsigned Depth) {
8415
  // Assume success. If there's no match, callers should not use these anyway.
8416
29.7M
  LHS = TrueVal;
8417
29.7M
  RHS = FalseVal;
8418
8419
29.7M
  SelectPatternResult SPR = matchClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal);
8420
29.7M
  if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
8421
21.2k
    return SPR;
8422
8423
29.7M
  SPR = matchMinMaxOfMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, Depth);
8424
29.7M
  if (SPR.Flavor != SelectPatternFlavor::SPF_UNKNOWN)
8425
8
    return SPR;
8426
8427
  // Look through 'not' ops to find disguised min/max.
8428
  // (X > Y) ? ~X : ~Y ==> (~X < ~Y) ? ~X : ~Y ==> MIN(~X, ~Y)
8429
  // (X < Y) ? ~X : ~Y ==> (~X > ~Y) ? ~X : ~Y ==> MAX(~X, ~Y)
8430
29.7M
  if (CmpLHS == getNotValue(TrueVal) && 
CmpRHS == getNotValue(FalseVal)3.36k
) {
8431
26
    switch (Pred) {
8432
0
    case CmpInst::ICMP_SGT: return {SPF_SMIN, SPNB_NA, false};
8433
7
    case CmpInst::ICMP_SLT: return {SPF_SMAX, SPNB_NA, false};
8434
4
    case CmpInst::ICMP_UGT: return {SPF_UMIN, SPNB_NA, false};
8435
15
    case CmpInst::ICMP_ULT: return {SPF_UMAX, SPNB_NA, false};
8436
0
    default: break;
8437
26
    }
8438
26
  }
8439
8440
  // (X > Y) ? ~Y : ~X ==> (~X < ~Y) ? ~Y : ~X ==> MAX(~Y, ~X)
8441
  // (X < Y) ? ~Y : ~X ==> (~X > ~Y) ? ~Y : ~X ==> MIN(~Y, ~X)
8442
29.7M
  if (CmpLHS == getNotValue(FalseVal) && 
CmpRHS == getNotValue(TrueVal)1.51k
) {
8443
6
    switch (Pred) {
8444
4
    case CmpInst::ICMP_SGT: return {SPF_SMAX, SPNB_NA, false};
8445
0
    case CmpInst::ICMP_SLT: return {SPF_SMIN, SPNB_NA, false};
8446
0
    case CmpInst::ICMP_UGT: return {SPF_UMAX, SPNB_NA, false};
8447
2
    case CmpInst::ICMP_ULT: return {SPF_UMIN, SPNB_NA, false};
8448
0
    default: break;
8449
6
    }
8450
6
  }
8451
8452
29.7M
  if (Pred != CmpInst::ICMP_SGT && 
Pred != CmpInst::ICMP_SLT26.7M
)
8453
20.9M
    return {SPF_UNKNOWN, SPNB_NA, false};
8454
8455
8.85M
  const APInt *C1;
8456
8.85M
  if (!match(CmpRHS, m_APInt(C1)))
8457
2.45M
    return {SPF_UNKNOWN, SPNB_NA, false};
8458
8459
  // An unsigned min/max can be written with a signed compare.
8460
6.39M
  const APInt *C2;
8461
6.39M
  if ((CmpLHS == TrueVal && 
match(FalseVal, m_APInt(C2))183k
) ||
8462
6.39M
      
(6.26M
CmpLHS == FalseVal6.26M
&&
match(TrueVal, m_APInt(C2))191k
)) {
8463
    // Is the sign bit set?
8464
    // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
8465
    // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
8466
196k
    if (Pred == CmpInst::ICMP_SLT && 
C1->isZero()53.1k
&&
C2->isMaxSignedValue()32.1k
)
8467
2.43k
      return {CmpLHS == TrueVal ? 
SPF_UMAX0
: SPF_UMIN, SPNB_NA, false};
8468
8469
    // Is the sign bit clear?
8470
    // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
8471
    // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
8472
194k
    if (Pred == CmpInst::ICMP_SGT && 
C1->isAllOnes()143k
&&
C2->isMinSignedValue()13.4k
)
8473
4
      return {CmpLHS == FalseVal ? 
SPF_UMAX2
:
SPF_UMIN2
, SPNB_NA, false};
8474
194k
  }
8475
8476
6.39M
  return {SPF_UNKNOWN, SPNB_NA, false};
8477
6.39M
}
8478
8479
bool llvm::isKnownNegation(const Value *X, const Value *Y, bool NeedNSW,
8480
259M
                           bool AllowPoison) {
8481
259M
  assert(X && Y && "Invalid operand");
8482
8483
518M
  auto IsNegationOf = [&](const Value *X, const Value *Y) {
8484
518M
    if (!match(X, m_Neg(m_Specific(Y))))
8485
518M
      return false;
8486
8487
190k
    auto *BO = cast<BinaryOperator>(X);
8488
190k
    if (NeedNSW && 
!BO->hasNoSignedWrap()45
)
8489
20
      return false;
8490
8491
190k
    auto *Zero = cast<Constant>(BO->getOperand(0));
8492
190k
    if (!AllowPoison && 
!Zero->isNullValue()26
)
8493
0
      return false;
8494
8495
190k
    return true;
8496
190k
  };
8497
8498
  // X = -Y or Y = -X
8499
259M
  if (IsNegationOf(X, Y) || 
IsNegationOf(Y, X)259M
)
8500
190k
    return true;
8501
8502
  // X = sub (A, B), Y = sub (B, A) || X = sub nsw (A, B), Y = sub nsw (B, A)
8503
259M
  Value *A, *B;
8504
259M
  return (!NeedNSW && 
(253M
match(X, m_Sub(m_Value(A), m_Value(B)))253M
&&
8505
253M
                        
match(Y, m_Sub(m_Specific(B), m_Specific(A)))6.84M
)) ||
8506
259M
         
(259M
NeedNSW259M
&&
(6.06M
match(X, m_NSWSub(m_Value(A), m_Value(B)))6.06M
&&
8507
6.06M
                       
match(Y, m_NSWSub(m_Specific(B), m_Specific(A)))375k
));
8508
259M
}
8509
8510
6.07M
bool llvm::isKnownInversion(const Value *X, const Value *Y) {
8511
  // Handle X = icmp pred A, B, Y = icmp pred A, C.
8512
6.07M
  Value *A, *B, *C;
8513
6.07M
  CmpPredicate Pred1, Pred2;
8514
6.07M
  if (!match(X, m_ICmp(Pred1, m_Value(A), m_Value(B))) ||
8515
6.07M
      
!match(Y, m_c_ICmp(Pred2, m_Specific(A), m_Value(C)))1.40M
)
8516
6.06M
    return false;
8517
8518
  // They must both have samesign flag or not.
8519
10.6k
  if (Pred1.hasSameSign() != Pred2.hasSameSign())
8520
1.93k
    return false;
8521
8522
8.67k
  if (B == C)
8523
5.56k
    return Pred1 == ICmpInst::getInversePredicate(Pred2);
8524
8525
  // Try to infer the relationship from constant ranges.
8526
3.11k
  const APInt *RHSC1, *RHSC2;
8527
3.11k
  if (!match(B, m_APInt(RHSC1)) || 
!match(C, m_APInt(RHSC2))2.64k
)
8528
487
    return false;
8529
8530
  // Sign bits of two RHSCs should match.
8531
2.62k
  if (Pred1.hasSameSign() && 
RHSC1->isNonNegative() != RHSC2->isNonNegative()8
)
8532
8
    return false;
8533
8534
2.61k
  const auto CR1 = ConstantRange::makeExactICmpRegion(Pred1, *RHSC1);
8535
2.61k
  const auto CR2 = ConstantRange::makeExactICmpRegion(Pred2, *RHSC2);
8536
8537
2.61k
  return CR1.inverse() == CR2;
8538
2.62k
}
8539
8540
SelectPatternResult llvm::getSelectPattern(CmpInst::Predicate Pred,
8541
                                           SelectPatternNaNBehavior NaNBehavior,
8542
1.80M
                                           bool Ordered) {
8543
1.80M
  switch (Pred) {
8544
41.4k
  default:
8545
41.4k
    return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
8546
534k
  case ICmpInst::ICMP_UGT:
8547
535k
  case ICmpInst::ICMP_UGE:
8548
535k
    return {SPF_UMAX, SPNB_NA, false};
8549
147k
  case ICmpInst::ICMP_SGT:
8550
148k
  case ICmpInst::ICMP_SGE:
8551
148k
    return {SPF_SMAX, SPNB_NA, false};
8552
401k
  case ICmpInst::ICMP_ULT:
8553
402k
  case ICmpInst::ICMP_ULE:
8554
402k
    return {SPF_UMIN, SPNB_NA, false};
8555
227k
  case ICmpInst::ICMP_SLT:
8556
228k
  case ICmpInst::ICMP_SLE:
8557
228k
    return {SPF_SMIN, SPNB_NA, false};
8558
319
  case FCmpInst::FCMP_UGT:
8559
330
  case FCmpInst::FCMP_UGE:
8560
313k
  case FCmpInst::FCMP_OGT:
8561
325k
  case FCmpInst::FCMP_OGE:
8562
325k
    return {SPF_FMAXNUM, NaNBehavior, Ordered};
8563
105
  case FCmpInst::FCMP_ULT:
8564
111
  case FCmpInst::FCMP_ULE:
8565
121k
  case FCmpInst::FCMP_OLT:
8566
124k
  case FCmpInst::FCMP_OLE:
8567
124k
    return {SPF_FMINNUM, NaNBehavior, Ordered};
8568
1.80M
  }
8569
1.80M
}
8570
8571
std::optional<std::pair<CmpPredicate, Constant *>>
8572
1.69M
llvm::getFlippedStrictnessPredicateAndConstant(CmpPredicate Pred, Constant *C) {
8573
1.69M
  assert(ICmpInst::isRelational(Pred) && ICmpInst::isIntPredicate(Pred) &&
8574
1.69M
         "Only for relational integer predicates.");
8575
1.69M
  if (isa<UndefValue>(C))
8576
0
    return std::nullopt;
8577
8578
1.69M
  Type *Type = C->getType();
8579
1.69M
  bool IsSigned = ICmpInst::isSigned(Pred);
8580
8581
1.69M
  CmpInst::Predicate UnsignedPred = ICmpInst::getUnsignedPredicate(Pred);
8582
1.69M
  bool WillIncrement =
8583
1.69M
      UnsignedPred == ICmpInst::ICMP_ULE || 
UnsignedPred == ICmpInst::ICMP_UGT1.08M
;
8584
8585
  // Check if the constant operand can be safely incremented/decremented
8586
  // without overflowing/underflowing.
8587
1.69M
  auto ConstantIsOk = [WillIncrement, IsSigned](ConstantInt *C) {
8588
1.69M
    return WillIncrement ? 
!C->isMaxValue(IsSigned)792k
:
!C->isMinValue(IsSigned)900k
;
8589
1.69M
  };
8590
8591
1.69M
  Constant *SafeReplacementConstant = nullptr;
8592
1.69M
  if (auto *CI = dyn_cast<ConstantInt>(C)) {
8593
    // Bail out if the constant can't be safely incremented/decremented.
8594
1.68M
    if (!ConstantIsOk(CI))
8595
0
      return std::nullopt;
8596
1.68M
  } else 
if (auto *9.51k
FVTy9.51k
= dyn_cast<FixedVectorType>(Type)) {
8597
1.16k
    unsigned NumElts = FVTy->getNumElements();
8598
12.7k
    for (unsigned i = 0; i != NumElts; 
++i11.6k
) {
8599
11.6k
      Constant *Elt = C->getAggregateElement(i);
8600
11.6k
      if (!Elt)
8601
0
        return std::nullopt;
8602
8603
11.6k
      if (isa<UndefValue>(Elt))
8604
0
        continue;
8605
8606
      // Bail out if we can't determine if this constant is min/max or if we
8607
      // know that this constant is min/max.
8608
11.6k
      auto *CI = dyn_cast<ConstantInt>(Elt);
8609
11.6k
      if (!CI || !ConstantIsOk(CI))
8610
0
        return std::nullopt;
8611
8612
11.6k
      if (!SafeReplacementConstant)
8613
1.16k
        SafeReplacementConstant = CI;
8614
11.6k
    }
8615
8.35k
  } else if (isa<VectorType>(C->getType())) {
8616
    // Handle scalable splat
8617
0
    Value *SplatC = C->getSplatValue();
8618
0
    auto *CI = dyn_cast_or_null<ConstantInt>(SplatC);
8619
    // Bail out if the constant can't be safely incremented/decremented.
8620
0
    if (!CI || !ConstantIsOk(CI))
8621
0
      return std::nullopt;
8622
8.35k
  } else {
8623
    // ConstantExpr?
8624
8.35k
    return std::nullopt;
8625
8.35k
  }
8626
8627
  // It may not be safe to change a compare predicate in the presence of
8628
  // undefined elements, so replace those elements with the first safe constant
8629
  // that we found.
8630
  // TODO: in case of poison, it is safe; let's replace undefs only.
8631
1.68M
  if (C->containsUndefOrPoisonElement()) {
8632
0
    assert(SafeReplacementConstant && "Replacement constant not set");
8633
0
    C = Constant::replaceUndefsWith(C, SafeReplacementConstant);
8634
0
  }
8635
8636
1.68M
  CmpInst::Predicate NewPred = CmpInst::getFlippedStrictnessPredicate(Pred);
8637
8638
  // Increment or decrement the constant.
8639
1.68M
  Constant *OneOrNegOne = ConstantInt::get(Type, WillIncrement ? 
1785k
:
-1897k
, true);
8640
1.68M
  Constant *NewC = ConstantExpr::getAdd(C, OneOrNegOne);
8641
8642
1.68M
  return std::make_pair(NewPred, NewC);
8643
1.69M
}
8644
8645
static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
8646
                                              FastMathFlags FMF,
8647
                                              Value *CmpLHS, Value *CmpRHS,
8648
                                              Value *TrueVal, Value *FalseVal,
8649
                                              Value *&LHS, Value *&RHS,
8650
33.6M
                                              unsigned Depth) {
8651
33.6M
  bool HasMismatchedZeros = false;
8652
33.6M
  if (CmpInst::isFPPredicate(Pred)) {
8653
    // IEEE-754 ignores the sign of 0.0 in comparisons. So if the select has one
8654
    // 0.0 operand, set the compare's 0.0 operands to that same value for the
8655
    // purpose of identifying min/max. Disregard vector constants with undefined
8656
    // elements because those can not be back-propagated for analysis.
8657
2.48M
    Value *OutputZeroVal = nullptr;
8658
2.48M
    if (match(TrueVal, m_AnyZeroFP()) && 
!match(FalseVal, m_AnyZeroFP())78.1k
&&
8659
2.48M
        
!cast<Constant>(TrueVal)->containsUndefOrPoisonElement()78.1k
)
8660
78.1k
      OutputZeroVal = TrueVal;
8661
2.40M
    else if (match(FalseVal, m_AnyZeroFP()) && 
!match(TrueVal, m_AnyZeroFP())222k
&&
8662
2.40M
             
!cast<Constant>(FalseVal)->containsUndefOrPoisonElement()222k
)
8663
222k
      OutputZeroVal = FalseVal;
8664
8665
2.48M
    if (OutputZeroVal) {
8666
300k
      if (match(CmpLHS, m_AnyZeroFP()) && 
CmpLHS != OutputZeroVal0
) {
8667
0
        HasMismatchedZeros = true;
8668
0
        CmpLHS = OutputZeroVal;
8669
0
      }
8670
300k
      if (match(CmpRHS, m_AnyZeroFP()) && 
CmpRHS != OutputZeroVal230k
) {
8671
1.58k
        HasMismatchedZeros = true;
8672
1.58k
        CmpRHS = OutputZeroVal;
8673
1.58k
      }
8674
300k
    }
8675
2.48M
  }
8676
8677
33.6M
  LHS = CmpLHS;
8678
33.6M
  RHS = CmpRHS;
8679
8680
  // Signed zero may return inconsistent results between implementations.
8681
  //  (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
8682
  //  minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
8683
  // Therefore, we behave conservatively and only proceed if at least one of the
8684
  // operands is known to not be zero or if we don't care about signed zero.
8685
33.6M
  switch (Pred) {
8686
31.2M
  default: break;
8687
31.2M
  
case CmpInst::FCMP_OGT: 755k
case CmpInst::FCMP_OLT:
8688
2.14M
  
case CmpInst::FCMP_UGT: 2.07M
case CmpInst::FCMP_ULT:
8689
2.14M
    if (!HasMismatchedZeros)
8690
2.13M
      break;
8691
417
    [[fallthrough]];
8692
180k
  
case CmpInst::FCMP_OGE: 121k
case CmpInst::FCMP_OLE:
8693
215k
  
case CmpInst::FCMP_UGE: 200k
case CmpInst::FCMP_ULE:
8694
215k
    if (!FMF.noSignedZeros() && 
!isKnownNonZero(CmpLHS)206k
&&
8695
215k
        
!isKnownNonZero(CmpRHS)206k
)
8696
171k
      return {SPF_UNKNOWN, SPNB_NA, false};
8697
33.6M
  }
8698
8699
33.4M
  SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
8700
33.4M
  bool Ordered = false;
8701
8702
  // When given one NaN and one non-NaN input:
8703
  //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
8704
  //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the
8705
  //     ordered comparison fails), which could be NaN or non-NaN.
8706
  // so here we discover exactly what NaN behavior is required/accepted.
8707
33.4M
  if (CmpInst::isFPPredicate(Pred)) {
8708
2.31M
    bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
8709
2.31M
    bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
8710
8711
2.31M
    if (LHSSafe && 
RHSSafe105k
) {
8712
      // Both operands are known non-NaN.
8713
105k
      NaNBehavior = SPNB_RETURNS_ANY;
8714
105k
      Ordered = CmpInst::isOrdered(Pred);
8715
2.20M
    } else if (CmpInst::isOrdered(Pred)) {
8716
      // An ordered comparison will return false when given a NaN, so it
8717
      // returns the RHS.
8718
2.01M
      Ordered = true;
8719
2.01M
      if (LHSSafe)
8720
        // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
8721
147
        NaNBehavior = SPNB_RETURNS_NAN;
8722
2.01M
      else if (RHSSafe)
8723
786k
        NaNBehavior = SPNB_RETURNS_OTHER;
8724
1.22M
      else
8725
        // Completely unsafe.
8726
1.22M
        return {SPF_UNKNOWN, SPNB_NA, false};
8727
2.01M
    } else {
8728
193k
      Ordered = false;
8729
      // An unordered comparison will return true when given a NaN, so it
8730
      // returns the LHS.
8731
193k
      if (LHSSafe)
8732
        // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
8733
29
        NaNBehavior = SPNB_RETURNS_OTHER;
8734
193k
      else if (RHSSafe)
8735
120k
        NaNBehavior = SPNB_RETURNS_NAN;
8736
72.3k
      else
8737
        // Completely unsafe.
8738
72.3k
        return {SPF_UNKNOWN, SPNB_NA, false};
8739
193k
    }
8740
2.31M
  }
8741
8742
32.1M
  if (TrueVal == CmpRHS && 
FalseVal == CmpLHS1.37M
) {
8743
964k
    std::swap(CmpLHS, CmpRHS);
8744
964k
    Pred = CmpInst::getSwappedPredicate(Pred);
8745
964k
    if (NaNBehavior == SPNB_RETURNS_NAN)
8746
143
      NaNBehavior = SPNB_RETURNS_OTHER;
8747
963k
    else if (NaNBehavior == SPNB_RETURNS_OTHER)
8748
310k
      NaNBehavior = SPNB_RETURNS_NAN;
8749
964k
    Ordered = !Ordered;
8750
964k
  }
8751
8752
  // ([if]cmp X, Y) ? X : Y
8753
32.1M
  if (TrueVal == CmpLHS && 
FalseVal == CmpRHS4.16M
)
8754
1.80M
    return getSelectPattern(Pred, NaNBehavior, Ordered);
8755
8756
30.3M
  if (isKnownNegation(TrueVal, FalseVal)) {
8757
    // Sign-extending LHS does not change its sign, so TrueVal/FalseVal can
8758
    // match against either LHS or sext(LHS).
8759
200k
    auto MaybeSExtCmpLHS =
8760
200k
        m_CombineOr(m_Specific(CmpLHS), m_SExt(m_Specific(CmpLHS)));
8761
200k
    auto ZeroOrAllOnes = m_CombineOr(m_ZeroInt(), m_AllOnes());
8762
200k
    auto ZeroOrOne = m_CombineOr(m_ZeroInt(), m_One());
8763
200k
    if (match(TrueVal, MaybeSExtCmpLHS)) {
8764
      // Set the return values. If the compare uses the negated value (-X >s 0),
8765
      // swap the return values because the negated value is always 'RHS'.
8766
21.9k
      LHS = TrueVal;
8767
21.9k
      RHS = FalseVal;
8768
21.9k
      if (match(CmpLHS, m_Neg(m_Specific(FalseVal))))
8769
0
        std::swap(LHS, RHS);
8770
8771
      // (X >s 0) ? X : -X or (X >s -1) ? X : -X --> ABS(X)
8772
      // (-X >s 0) ? -X : X or (-X >s -1) ? -X : X --> ABS(X)
8773
21.9k
      if (Pred == ICmpInst::ICMP_SGT && 
match(CmpRHS, ZeroOrAllOnes)16.8k
)
8774
16.8k
        return {SPF_ABS, SPNB_NA, false};
8775
8776
      // (X >=s 0) ? X : -X or (X >=s 1) ? X : -X --> ABS(X)
8777
5.07k
      if (Pred == ICmpInst::ICMP_SGE && 
match(CmpRHS, ZeroOrOne)5.00k
)
8778
5.00k
        return {SPF_ABS, SPNB_NA, false};
8779
8780
      // (X <s 0) ? X : -X or (X <s 1) ? X : -X --> NABS(X)
8781
      // (-X <s 0) ? -X : X or (-X <s 1) ? -X : X --> NABS(X)
8782
63
      if (Pred == ICmpInst::ICMP_SLT && 
match(CmpRHS, ZeroOrOne)53
)
8783
53
        return {SPF_NABS, SPNB_NA, false};
8784
63
    }
8785
178k
    else if (match(FalseVal, MaybeSExtCmpLHS)) {
8786
      // Set the return values. If the compare uses the negated value (-X >s 0),
8787
      // swap the return values because the negated value is always 'RHS'.
8788
33.4k
      LHS = FalseVal;
8789
33.4k
      RHS = TrueVal;
8790
33.4k
      if (match(CmpLHS, m_Neg(m_Specific(TrueVal))))
8791
0
        std::swap(LHS, RHS);
8792
8793
      // (X >s 0) ? -X : X or (X >s -1) ? -X : X --> NABS(X)
8794
      // (-X >s 0) ? X : -X or (-X >s -1) ? X : -X --> NABS(X)
8795
33.4k
      if (Pred == ICmpInst::ICMP_SGT && 
match(CmpRHS, ZeroOrAllOnes)74
)
8796
28
        return {SPF_NABS, SPNB_NA, false};
8797
8798
      // (X <s 0) ? -X : X or (X <s 1) ? -X : X --> ABS(X)
8799
      // (-X <s 0) ? X : -X or (-X <s 1) ? X : -X --> ABS(X)
8800
33.3k
      if (Pred == ICmpInst::ICMP_SLT && 
match(CmpRHS, ZeroOrOne)32.9k
)
8801
32.9k
        return {SPF_ABS, SPNB_NA, false};
8802
33.3k
    }
8803
200k
  }
8804
8805
30.3M
  if (CmpInst::isIntPredicate(Pred))
8806
29.7M
    return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS, Depth);
8807
8808
  // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
8809
  // may return either -0.0 or 0.0, so fcmp/select pair has stricter
8810
  // semantics than minNum. Be conservative in such case.
8811
521k
  if (NaNBehavior != SPNB_RETURNS_ANY ||
8812
521k
      
(69.4k
!FMF.noSignedZeros()69.4k
&&
!isKnownNonZero(CmpLHS)47.9k
&&
8813
69.4k
       
!isKnownNonZero(CmpRHS)47.9k
))
8814
483k
    return {SPF_UNKNOWN, SPNB_NA, false};
8815
8816
37.8k
  return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
8817
521k
}
8818
8819
static Value *lookThroughCastConst(CmpInst *CmpI, Type *SrcTy, Constant *C,
8820
91.7k
                                   Instruction::CastOps *CastOp) {
8821
91.7k
  const DataLayout &DL = CmpI->getDataLayout();
8822
8823
91.7k
  Constant *CastedTo = nullptr;
8824
91.7k
  switch (*CastOp) {
8825
18.3k
  case Instruction::ZExt:
8826
18.3k
    if (CmpI->isUnsigned())
8827
8.27k
      CastedTo = ConstantExpr::getTrunc(C, SrcTy);
8828
18.3k
    break;
8829
3.30k
  case Instruction::SExt:
8830
3.30k
    if (CmpI->isSigned())
8831
826
      CastedTo = ConstantExpr::getTrunc(C, SrcTy, true);
8832
3.30k
    break;
8833
61.9k
  case Instruction::Trunc:
8834
61.9k
    Constant *CmpConst;
8835
61.9k
    if (match(CmpI->getOperand(1), m_Constant(CmpConst)) &&
8836
61.9k
        
CmpConst->getType() == SrcTy45.3k
) {
8837
      // Here we have the following case:
8838
      //
8839
      //   %cond = cmp iN %x, CmpConst
8840
      //   %tr = trunc iN %x to iK
8841
      //   %narrowsel = select i1 %cond, iK %t, iK C
8842
      //
8843
      // We can always move trunc after select operation:
8844
      //
8845
      //   %cond = cmp iN %x, CmpConst
8846
      //   %widesel = select i1 %cond, iN %x, iN CmpConst
8847
      //   %tr = trunc iN %widesel to iK
8848
      //
8849
      // Note that C could be extended in any way because we don't care about
8850
      // upper bits after truncation. It can't be abs pattern, because it would
8851
      // look like:
8852
      //
8853
      //   select i1 %cond, x, -x.
8854
      //
8855
      // So only min/max pattern could be matched. Such match requires widened C
8856
      // == CmpConst. That is why set widened C = CmpConst, condition trunc
8857
      // CmpConst == C is checked below.
8858
30.1k
      CastedTo = CmpConst;
8859
31.7k
    } else {
8860
31.7k
      unsigned ExtOp = CmpI->isSigned() ? 
Instruction::SExt11.7k
:
Instruction::ZExt19.9k
;
8861
31.7k
      CastedTo = ConstantFoldCastOperand(ExtOp, C, SrcTy, DL);
8862
31.7k
    }
8863
61.9k
    break;
8864
484
  case Instruction::FPTrunc:
8865
484
    CastedTo = ConstantFoldCastOperand(Instruction::FPExt, C, SrcTy, DL);
8866
484
    break;
8867
297
  case Instruction::FPExt:
8868
297
    CastedTo = ConstantFoldCastOperand(Instruction::FPTrunc, C, SrcTy, DL);
8869
297
    break;
8870
467
  case Instruction::FPToUI:
8871
467
    CastedTo = ConstantFoldCastOperand(Instruction::UIToFP, C, SrcTy, DL);
8872
467
    break;
8873
1.31k
  case Instruction::FPToSI:
8874
1.31k
    CastedTo = ConstantFoldCastOperand(Instruction::SIToFP, C, SrcTy, DL);
8875
1.31k
    break;
8876
206
  case Instruction::UIToFP:
8877
206
    CastedTo = ConstantFoldCastOperand(Instruction::FPToUI, C, SrcTy, DL);
8878
206
    break;
8879
30
  case Instruction::SIToFP:
8880
30
    CastedTo = ConstantFoldCastOperand(Instruction::FPToSI, C, SrcTy, DL);
8881
30
    break;
8882
5.41k
  default:
8883
5.41k
    break;
8884
91.7k
  }
8885
8886
91.7k
  if (!CastedTo)
8887
17.9k
    return nullptr;
8888
8889
  // Make sure the cast doesn't lose any information.
8890
73.8k
  Constant *CastedBack =
8891
73.8k
      ConstantFoldCastOperand(*CastOp, CastedTo, C->getType(), DL);
8892
73.8k
  if (CastedBack && CastedBack != C)
8893
24.6k
    return nullptr;
8894
8895
49.2k
  return CastedTo;
8896
73.8k
}
8897
8898
/// Helps to match a select pattern in case of a type mismatch.
8899
///
8900
/// The function processes the case when type of true and false values of a
8901
/// select instruction differs from type of the cmp instruction operands because
8902
/// of a cast instruction. The function checks if it is legal to move the cast
8903
/// operation after "select". If yes, it returns the new second value of
8904
/// "select" (with the assumption that cast is moved):
8905
/// 1. As operand of cast instruction when both values of "select" are same cast
8906
/// instructions.
8907
/// 2. As restored constant (by applying reverse cast operation) when the first
8908
/// value of the "select" is a cast operation and the second value is a
8909
/// constant. It is implemented in lookThroughCastConst().
8910
/// 3. As one operand is cast instruction and the other is not. The operands in
8911
/// sel(cmp) are in different type integer.
8912
/// NOTE: We return only the new second value because the first value could be
8913
/// accessed as operand of cast instruction.
8914
static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
8915
3.72M
                              Instruction::CastOps *CastOp) {
8916
3.72M
  auto *Cast1 = dyn_cast<CastInst>(V1);
8917
3.72M
  if (!Cast1)
8918
3.50M
    return nullptr;
8919
8920
211k
  *CastOp = Cast1->getOpcode();
8921
211k
  Type *SrcTy = Cast1->getSrcTy();
8922
211k
  if (auto *Cast2 = dyn_cast<CastInst>(V2)) {
8923
    // If V1 and V2 are both the same cast from the same type, look through V1.
8924
26.7k
    if (*CastOp == Cast2->getOpcode() && 
SrcTy == Cast2->getSrcTy()894
)
8925
800
      return Cast2->getOperand(0);
8926
25.9k
    return nullptr;
8927
26.7k
  }
8928
8929
184k
  auto *C = dyn_cast<Constant>(V2);
8930
184k
  if (C)
8931
91.7k
    return lookThroughCastConst(CmpI, SrcTy, C, CastOp);
8932
8933
92.8k
  Value *CastedTo = nullptr;
8934
92.8k
  if (*CastOp == Instruction::Trunc) {
8935
21.0k
    if (match(CmpI->getOperand(1), m_ZExtOrSExt(m_Specific(V2)))) {
8936
      // Here we have the following case:
8937
      //   %y_ext = sext iK %y to iN
8938
      //   %cond = cmp iN %x, %y_ext
8939
      //   %tr = trunc iN %x to iK
8940
      //   %narrowsel = select i1 %cond, iK %tr, iK %y
8941
      //
8942
      // We can always move trunc after select operation:
8943
      //   %y_ext = sext iK %y to iN
8944
      //   %cond = cmp iN %x, %y_ext
8945
      //   %widesel = select i1 %cond, iN %x, iN %y_ext
8946
      //   %tr = trunc iN %widesel to iK
8947
1.21k
      assert(V2->getType() == Cast1->getType() &&
8948
1.21k
             "V2 and Cast1 should be the same type.");
8949
1.21k
      CastedTo = CmpI->getOperand(1);
8950
1.21k
    }
8951
21.0k
  }
8952
8953
92.8k
  return CastedTo;
8954
184k
}
8955
SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
8956
                                             Instruction::CastOps *CastOp,
8957
294M
                                             unsigned Depth) {
8958
294M
  if (Depth >= MaxAnalysisRecursionDepth)
8959
6.70k
    return {SPF_UNKNOWN, SPNB_NA, false};
8960
8961
294M
  SelectInst *SI = dyn_cast<SelectInst>(V);
8962
294M
  if (!SI) 
return {SPF_UNKNOWN, SPNB_NA, false}241M
;
8963
8964
53.0M
  CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
8965
53.0M
  if (!CmpI) 
return {SPF_UNKNOWN, SPNB_NA, false}4.79M
;
8966
8967
48.2M
  Value *TrueVal = SI->getTrueValue();
8968
48.2M
  Value *FalseVal = SI->getFalseValue();
8969
8970
48.2M
  return llvm::matchDecomposedSelectPattern(
8971
48.2M
      CmpI, TrueVal, FalseVal, LHS, RHS,
8972
48.2M
      isa<FPMathOperator>(SI) ? 
SI->getFastMathFlags()2.00M
:
FastMathFlags()46.2M
,
8973
48.2M
      CastOp, Depth);
8974
53.0M
}
8975
8976
SelectPatternResult llvm::matchDecomposedSelectPattern(
8977
    CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
8978
78.6M
    FastMathFlags FMF, Instruction::CastOps *CastOp, unsigned Depth) {
8979
78.6M
  CmpInst::Predicate Pred = CmpI->getPredicate();
8980
78.6M
  Value *CmpLHS = CmpI->getOperand(0);
8981
78.6M
  Value *CmpRHS = CmpI->getOperand(1);
8982
78.6M
  if (isa<FPMathOperator>(CmpI) && 
CmpI->hasNoNaNs()2.76M
)
8983
120k
    FMF.setNoNaNs();
8984
8985
  // Bail out early.
8986
78.6M
  if (CmpI->isEquality())
8987
45.0M
    return {SPF_UNKNOWN, SPNB_NA, false};
8988
8989
  // Deal with type mismatches.
8990
33.6M
  if (CastOp && 
CmpLHS->getType() != TrueVal->getType()4.93M
) {
8991
1.87M
    if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) {
8992
      // If this is a potential fmin/fmax with a cast to integer, then ignore
8993
      // -0.0 because there is no corresponding integer value.
8994
26.7k
      if (*CastOp == Instruction::FPToSI || 
*CastOp == Instruction::FPToUI26.5k
)
8995
369
        FMF.setNoSignedZeros();
8996
26.7k
      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
8997
26.7k
                                  cast<CastInst>(TrueVal)->getOperand(0), C,
8998
26.7k
                                  LHS, RHS, Depth);
8999
26.7k
    }
9000
1.84M
    if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) {
9001
      // If this is a potential fmin/fmax with a cast to integer, then ignore
9002
      // -0.0 because there is no corresponding integer value.
9003
24.4k
      if (*CastOp == Instruction::FPToSI || 
*CastOp == Instruction::FPToUI23.4k
)
9004
1.18k
        FMF.setNoSignedZeros();
9005
24.4k
      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
9006
24.4k
                                  C, cast<CastInst>(FalseVal)->getOperand(0),
9007
24.4k
                                  LHS, RHS, Depth);
9008
24.4k
    }
9009
1.84M
  }
9010
33.5M
  return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
9011
33.5M
                              LHS, RHS, Depth);
9012
33.6M
}
9013
9014
387k
CmpInst::Predicate llvm::getMinMaxPred(SelectPatternFlavor SPF, bool Ordered) {
9015
387k
  if (SPF == SPF_SMIN) 
return ICmpInst::ICMP_SLT87.0k
;
9016
300k
  if (SPF == SPF_UMIN) 
return ICmpInst::ICMP_ULT124k
;
9017
176k
  if (SPF == SPF_SMAX) 
return ICmpInst::ICMP_SGT57.1k
;
9018
119k
  if (SPF == SPF_UMAX) 
return ICmpInst::ICMP_UGT118k
;
9019
246
  if (SPF == SPF_FMINNUM)
9020
49
    return Ordered ? 
FCmpInst::FCMP_OLT8
:
FCmpInst::FCMP_ULT41
;
9021
197
  if (SPF == SPF_FMAXNUM)
9022
197
    return Ordered ? 
FCmpInst::FCMP_OGT33
:
FCmpInst::FCMP_UGT164
;
9023
197
  
llvm_unreachable0
("unhandled!");
9024
197
}
9025
9026
229k
Intrinsic::ID llvm::getMinMaxIntrinsic(SelectPatternFlavor SPF) {
9027
229k
  switch (SPF) {
9028
94.2k
  case SelectPatternFlavor::SPF_UMIN:
9029
94.2k
    return Intrinsic::umin;
9030
62.1k
  case SelectPatternFlavor::SPF_UMAX:
9031
62.1k
    return Intrinsic::umax;
9032
43.4k
  case SelectPatternFlavor::SPF_SMIN:
9033
43.4k
    return Intrinsic::smin;
9034
29.9k
  case SelectPatternFlavor::SPF_SMAX:
9035
29.9k
    return Intrinsic::smax;
9036
0
  default:
9037
0
    llvm_unreachable("Unexpected SPF");
9038
229k
  }
9039
229k
}
9040
9041
162k
SelectPatternFlavor llvm::getInverseMinMaxFlavor(SelectPatternFlavor SPF) {
9042
162k
  if (SPF == SPF_SMIN) 
return SPF_SMAX45.7k
;
9043
116k
  if (SPF == SPF_UMIN) 
return SPF_UMAX66.6k
;
9044
49.9k
  if (SPF == SPF_SMAX) 
return SPF_SMIN20.4k
;
9045
29.5k
  if (SPF == SPF_UMAX) return SPF_UMIN;
9046
29.5k
  
llvm_unreachable0
("unhandled!");
9047
29.5k
}
9048
9049
20.6M
Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
9050
20.6M
  switch (MinMaxID) {
9051
5.09M
  case Intrinsic::smax: return Intrinsic::smin;
9052
11.0M
  case Intrinsic::smin: return Intrinsic::smax;
9053
2.01M
  case Intrinsic::umax: return Intrinsic::umin;
9054
2.52M
  case Intrinsic::umin: return Intrinsic::umax;
9055
  // Please note that next four intrinsics may produce the same result for
9056
  // original and inverted case even if X != Y due to NaN is handled specially.
9057
0
  case Intrinsic::maximum: return Intrinsic::minimum;
9058
0
  case Intrinsic::minimum: return Intrinsic::maximum;
9059
0
  case Intrinsic::maxnum: return Intrinsic::minnum;
9060
0
  case Intrinsic::minnum: return Intrinsic::maxnum;
9061
0
  default: llvm_unreachable("Unexpected intrinsic");
9062
20.6M
  }
9063
20.6M
}
9064
9065
161k
APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
9066
161k
  switch (SPF) {
9067
45.5k
  case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth);
9068
20.2k
  case SPF_SMIN: return APInt::getSignedMinValue(BitWidth);
9069
66.6k
  case SPF_UMAX: return APInt::getMaxValue(BitWidth);
9070
29.5k
  case SPF_UMIN: return APInt::getMinValue(BitWidth);
9071
0
  default: llvm_unreachable("Unexpected flavor");
9072
161k
  }
9073
161k
}
9074
9075
std::pair<Intrinsic::ID, bool>
9076
0
llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
9077
  // Check if VL contains select instructions that can be folded into a min/max
9078
  // vector intrinsic and return the intrinsic if it is possible.
9079
  // TODO: Support floating point min/max.
9080
0
  bool AllCmpSingleUse = true;
9081
0
  SelectPatternResult SelectPattern;
9082
0
  SelectPattern.Flavor = SPF_UNKNOWN;
9083
0
  if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) {
9084
0
        Value *LHS, *RHS;
9085
0
        auto CurrentPattern = matchSelectPattern(I, LHS, RHS);
9086
0
        if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor))
9087
0
          return false;
9088
0
        if (SelectPattern.Flavor != SPF_UNKNOWN &&
9089
0
            SelectPattern.Flavor != CurrentPattern.Flavor)
9090
0
          return false;
9091
0
        SelectPattern = CurrentPattern;
9092
0
        AllCmpSingleUse &=
9093
0
            match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value()));
9094
0
        return true;
9095
0
      })) {
9096
0
    switch (SelectPattern.Flavor) {
9097
0
    case SPF_SMIN:
9098
0
      return {Intrinsic::smin, AllCmpSingleUse};
9099
0
    case SPF_UMIN:
9100
0
      return {Intrinsic::umin, AllCmpSingleUse};
9101
0
    case SPF_SMAX:
9102
0
      return {Intrinsic::smax, AllCmpSingleUse};
9103
0
    case SPF_UMAX:
9104
0
      return {Intrinsic::umax, AllCmpSingleUse};
9105
0
    case SPF_FMAXNUM:
9106
0
      return {Intrinsic::maxnum, AllCmpSingleUse};
9107
0
    case SPF_FMINNUM:
9108
0
      return {Intrinsic::minnum, AllCmpSingleUse};
9109
0
    default:
9110
0
      llvm_unreachable("unexpected select pattern flavor");
9111
0
    }
9112
0
  }
9113
0
  return {Intrinsic::not_intrinsic, false};
9114
0
}
9115
9116
template <typename InstTy>
9117
static bool matchTwoInputRecurrence(const PHINode *PN, InstTy *&Inst,
9118
478M
                                    Value *&Init, Value *&OtherOp) {
9119
  // Handle the case of a simple two-predecessor recurrence PHI.
9120
  // There's a lot more that could theoretically be done here, but
9121
  // this is sufficient to catch some interesting cases.
9122
  // TODO: Expand list -- gep, uadd.sat etc.
9123
478M
  if (PN->getNumIncomingValues() != 2)
9124
56.7M
    return false;
9125
9126
1.06G
  
for (unsigned I = 0; 422M
I != 2;
++I641M
) {
9127
778M
    if (auto *Operation = dyn_cast<InstTy>(PN->getIncomingValue(I))) {
9128
228M
      Value *LHS = Operation->getOperand(0);
9129
228M
      Value *RHS = Operation->getOperand(1);
9130
228M
      if (LHS != PN && 
RHS != PN109M
)
9131
90.6M
        continue;
9132
9133
137M
      Inst = Operation;
9134
137M
      Init = PN->getIncomingValue(!I);
9135
137M
      OtherOp = (LHS == PN) ? 
RHS118M
:
LHS18.8M
;
9136
137M
      return true;
9137
228M
    }
9138
778M
  }
9139
284M
  return false;
9140
422M
}
ValueTracking.cpp:bool matchTwoInputRecurrence<llvm::BinaryOperator>(llvm::PHINode const*, llvm::BinaryOperator*&, llvm::Value*&, llvm::Value*&)
Line
Count
Source
9118
478M
                                    Value *&Init, Value *&OtherOp) {
9119
  // Handle the case of a simple two-predecessor recurrence PHI.
9120
  // There's a lot more that could theoretically be done here, but
9121
  // this is sufficient to catch some interesting cases.
9122
  // TODO: Expand list -- gep, uadd.sat etc.
9123
478M
  if (PN->getNumIncomingValues() != 2)
9124
56.7M
    return false;
9125
9126
1.06G
  
for (unsigned I = 0; 422M
I != 2;
++I641M
) {
9127
778M
    if (auto *Operation = dyn_cast<InstTy>(PN->getIncomingValue(I))) {
9128
228M
      Value *LHS = Operation->getOperand(0);
9129
228M
      Value *RHS = Operation->getOperand(1);
9130
228M
      if (LHS != PN && 
RHS != PN109M
)
9131
90.6M
        continue;
9132
9133
137M
      Inst = Operation;
9134
137M
      Init = PN->getIncomingValue(!I);
9135
137M
      OtherOp = (LHS == PN) ? 
RHS118M
:
LHS18.8M
;
9136
137M
      return true;
9137
228M
    }
9138
778M
  }
9139
284M
  return false;
9140
422M
}
Unexecuted instantiation: ValueTracking.cpp:bool matchTwoInputRecurrence<llvm::IntrinsicInst>(llvm::PHINode const*, llvm::IntrinsicInst*&, llvm::Value*&, llvm::Value*&)
9141
9142
bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
9143
478M
                                 Value *&Start, Value *&Step) {
9144
  // We try to match a recurrence of the form:
9145
  //   %iv = [Start, %entry], [%iv.next, %backedge]
9146
  //   %iv.next = binop %iv, Step
9147
  // Or:
9148
  //   %iv = [Start, %entry], [%iv.next, %backedge]
9149
  //   %iv.next = binop Step, %iv
9150
478M
  return matchTwoInputRecurrence(P, BO, Start, Step);
9151
478M
}
9152
9153
bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P,
9154
50.5M
                                 Value *&Start, Value *&Step) {
9155
50.5M
  BinaryOperator *BO = nullptr;
9156
50.5M
  P = dyn_cast<PHINode>(I->getOperand(0));
9157
50.5M
  if (!P)
9158
47.4M
    P = dyn_cast<PHINode>(I->getOperand(1));
9159
50.5M
  return P && 
matchSimpleRecurrence(P, BO, Start, Step)3.96M
&&
BO == I1.32M
;
9160
50.5M
}
9161
9162
bool llvm::matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I,
9163
                                                PHINode *&P, Value *&Init,
9164
0
                                                Value *&OtherOp) {
9165
  // Binary intrinsics only supported for now.
9166
0
  if (I->arg_size() != 2 || I->getType() != I->getArgOperand(0)->getType() ||
9167
0
      I->getType() != I->getArgOperand(1)->getType())
9168
0
    return false;
9169
9170
0
  IntrinsicInst *II = nullptr;
9171
0
  P = dyn_cast<PHINode>(I->getArgOperand(0));
9172
0
  if (!P)
9173
0
    P = dyn_cast<PHINode>(I->getArgOperand(1));
9174
9175
0
  return P && matchTwoInputRecurrence(P, II, Init, OtherOp) && II == I;
9176
0
}
9177
9178
/// Return true if "icmp Pred LHS RHS" is always true.
9179
static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
9180
21.3M
                            const Value *RHS) {
9181
21.3M
  if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
9182
6.44M
    return true;
9183
9184
14.9M
  switch (Pred) {
9185
0
  default:
9186
0
    return false;
9187
9188
5.55M
  case CmpInst::ICMP_SLE: {
9189
5.55M
    const APInt *C;
9190
9191
    // LHS s<= LHS +_{nsw} C   if C >= 0
9192
    // LHS s<= LHS | C         if C >= 0
9193
5.55M
    if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))) ||
9194
5.55M
        
match(RHS, m_Or(m_Specific(LHS), m_APInt(C)))5.46M
)
9195
88.9k
      return !C->isNegative();
9196
9197
    // LHS s<= smax(LHS, V) for any V
9198
5.46M
    if (match(RHS, m_c_SMax(m_Specific(LHS), m_Value())))
9199
529
      return true;
9200
9201
    // smin(RHS, V) s<= RHS for any V
9202
5.46M
    if (match(LHS, m_c_SMin(m_Specific(RHS), m_Value())))
9203
5.33k
      return true;
9204
9205
    // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB)
9206
5.45M
    const Value *X;
9207
5.45M
    const APInt *CLHS, *CRHS;
9208
5.45M
    if (match(LHS, m_NSWAddLike(m_Value(X), m_APInt(CLHS))) &&
9209
5.45M
        
match(RHS, m_NSWAddLike(m_Specific(X), m_APInt(CRHS)))357k
)
9210
4.32k
      return CLHS->sle(*CRHS);
9211
9212
5.45M
    return false;
9213
5.45M
  }
9214
9215
9.39M
  case CmpInst::ICMP_ULE: {
9216
    // LHS u<= LHS +_{nuw} V for any V
9217
9.39M
    if (match(RHS, m_c_Add(m_Specific(LHS), m_Value())) &&
9218
9.39M
        
cast<OverflowingBinaryOperator>(RHS)->hasNoUnsignedWrap()147k
)
9219
18.8k
      return true;
9220
9221
    // LHS u<= LHS | V for any V
9222
9.37M
    if (match(RHS, m_c_Or(m_Specific(LHS), m_Value())))
9223
4.57k
      return true;
9224
9225
    // LHS u<= umax(LHS, V) for any V
9226
9.37M
    if (match(RHS, m_c_UMax(m_Specific(LHS), m_Value())))
9227
32.1k
      return true;
9228
9229
    // RHS >> V u<= RHS for any V
9230
9.33M
    if (match(LHS, m_LShr(m_Specific(RHS), m_Value())))
9231
4.26k
      return true;
9232
9233
    // RHS u/ C_ugt_1 u<= RHS
9234
9.33M
    const APInt *C;
9235
9.33M
    if (match(LHS, m_UDiv(m_Specific(RHS), m_APInt(C))) && 
C->ugt(1)714
)
9236
714
      return true;
9237
9238
    // RHS & V u<= RHS for any V
9239
9.33M
    if (match(LHS, m_c_And(m_Specific(RHS), m_Value())))
9240
1.52k
      return true;
9241
9242
    // umin(RHS, V) u<= RHS for any V
9243
9.33M
    if (match(LHS, m_c_UMin(m_Specific(RHS), m_Value())))
9244
4.74k
      return true;
9245
9246
    // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
9247
9.32M
    const Value *X;
9248
9.32M
    const APInt *CLHS, *CRHS;
9249
9.32M
    if (match(LHS, m_NUWAddLike(m_Value(X), m_APInt(CLHS))) &&
9250
9.32M
        
match(RHS, m_NUWAddLike(m_Specific(X), m_APInt(CRHS)))727k
)
9251
41.7k
      return CLHS->ule(*CRHS);
9252
9253
9.28M
    return false;
9254
9.32M
  }
9255
14.9M
  }
9256
14.9M
}
9257
9258
/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
9259
/// ALHS ARHS" is true.  Otherwise, return std::nullopt.
9260
static std::optional<bool>
9261
isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
9262
136M
                      const Value *ARHS, const Value *BLHS, const Value *BRHS) {
9263
136M
  switch (Pred) {
9264
121M
  default:
9265
121M
    return std::nullopt;
9266
9267
1.77M
  case CmpInst::ICMP_SLT:
9268
3.15M
  case CmpInst::ICMP_SLE:
9269
3.15M
    if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS) &&
9270
3.15M
        
isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS)986k
)
9271
1.52k
      return true;
9272
3.15M
    return std::nullopt;
9273
9274
2.07M
  case CmpInst::ICMP_SGT:
9275
2.35M
  case CmpInst::ICMP_SGE:
9276
2.35M
    if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS) &&
9277
2.35M
        
isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS)813k
)
9278
467
      return true;
9279
2.35M
    return std::nullopt;
9280
9281
3.39M
  case CmpInst::ICMP_ULT:
9282
4.39M
  case CmpInst::ICMP_ULE:
9283
4.39M
    if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS) &&
9284
4.39M
        
isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS)1.47M
)
9285
3.98k
      return true;
9286
4.38M
    return std::nullopt;
9287
9288
3.78M
  case CmpInst::ICMP_UGT:
9289
4.96M
  case CmpInst::ICMP_UGE:
9290
4.96M
    if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS) &&
9291
4.96M
        
isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS)3.24M
)
9292
26.2k
      return true;
9293
4.93M
    return std::nullopt;
9294
136M
  }
9295
136M
}
9296
9297
/// Return true if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is true.
9298
/// Return false if "icmp LPred X, LCR" implies "icmp RPred X, RCR" is false.
9299
/// Otherwise, return std::nullopt if we can't infer anything.
9300
static std::optional<bool>
9301
isImpliedCondCommonOperandWithCR(CmpPredicate LPred, const ConstantRange &LCR,
9302
103M
                                 CmpPredicate RPred, const ConstantRange &RCR) {
9303
103M
  auto CRImpliesPred = [&](ConstantRange CR,
9304
107M
                           CmpInst::Predicate Pred) -> std::optional<bool> {
9305
    // If all true values for lhs and true for rhs, lhs implies rhs
9306
107M
    if (CR.icmp(Pred, RCR))
9307
23.0M
      return true;
9308
9309
    // If there is no overlap, lhs implies not rhs
9310
84.7M
    if (CR.icmp(CmpInst::getInversePredicate(Pred), RCR))
9311
2.53M
      return false;
9312
9313
82.1M
    return std::nullopt;
9314
84.7M
  };
9315
103M
  if (auto Res = CRImpliesPred(ConstantRange::makeAllowedICmpRegion(LPred, LCR),
9316
103M
                               RPred))
9317
25.5M
    return Res;
9318
77.6M
  if (LPred.hasSameSign() ^ RPred.hasSameSign()) {
9319
4.53M
    LPred = LPred.hasSameSign() ? 
ICmpInst::getFlippedSignednessPredicate(LPred)2.48M
9320
4.53M
                                : 
LPred.dropSameSign()2.04M
;
9321
4.53M
    RPred = RPred.hasSameSign() ? 
ICmpInst::getFlippedSignednessPredicate(RPred)2.04M
9322
4.53M
                                : 
RPred.dropSameSign()2.48M
;
9323
4.53M
    return CRImpliesPred(ConstantRange::makeAllowedICmpRegion(LPred, LCR),
9324
4.53M
                         RPred);
9325
4.53M
  }
9326
73.1M
  return std::nullopt;
9327
77.6M
}
9328
9329
/// Return true if LHS implies RHS (expanded to its components as "R0 RPred R1")
9330
/// is true.  Return false if LHS implies RHS is false. Otherwise, return
9331
/// std::nullopt if we can't infer anything.
9332
static std::optional<bool>
9333
isImpliedCondICmps(CmpPredicate LPred, const Value *L0, const Value *L1,
9334
                   CmpPredicate RPred, const Value *R0, const Value *R1,
9335
673M
                   const DataLayout &DL, bool LHSIsTrue) {
9336
  // The rest of the logic assumes the LHS condition is true.  If that's not the
9337
  // case, invert the predicate to make it so.
9338
673M
  if (!LHSIsTrue)
9339
378M
    LPred = ICmpInst::getInverseCmpPredicate(LPred);
9340
9341
  // We can have non-canonical operands, so try to normalize any common operand
9342
  // to L0/R0.
9343
673M
  if (L0 == R1) {
9344
12.2M
    std::swap(R0, R1);
9345
12.2M
    RPred = ICmpInst::getSwappedCmpPredicate(RPred);
9346
12.2M
  }
9347
673M
  if (R0 == L1) {
9348
5.74M
    std::swap(L0, L1);
9349
5.74M
    LPred = ICmpInst::getSwappedCmpPredicate(LPred);
9350
5.74M
  }
9351
673M
  if (L1 == R1) {
9352
    // If we have L0 == R0 and L1 == R1, then make L1/R1 the constants.
9353
126M
    if (L0 != R0 || 
match(L0, m_ImmConstant())60.7M
) {
9354
65.7M
      std::swap(L0, L1);
9355
65.7M
      LPred = ICmpInst::getSwappedCmpPredicate(LPred);
9356
65.7M
      std::swap(R0, R1);
9357
65.7M
      RPred = ICmpInst::getSwappedCmpPredicate(RPred);
9358
65.7M
    }
9359
126M
  }
9360
9361
  // See if we can infer anything if operand-0 matches and we have at least one
9362
  // constant.
9363
673M
  const APInt *Unused;
9364
673M
  if (L0 == R0 && 
(209M
match(L1, m_APInt(Unused))209M
||
match(R1, m_APInt(Unused))113M
)) {
9365
    // Potential TODO: We could also further use the constant range of L0/R0 to
9366
    // further constraint the constant ranges. At the moment this leads to
9367
    // several regressions related to not transforming `multi_use(A + C0) eq/ne
9368
    // C1` (see discussion: D58633).
9369
103M
    ConstantRange LCR = computeConstantRange(
9370
103M
        L1, ICmpInst::isSigned(LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9371
103M
        /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
9372
103M
    ConstantRange RCR = computeConstantRange(
9373
103M
        R1, ICmpInst::isSigned(RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
9374
103M
        /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
9375
    // Even if L1/R1 are not both constant, we can still sometimes deduce
9376
    // relationship from a single constant. For example X u> Y implies X != 0.
9377
103M
    if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR))
9378
25.6M
      return R;
9379
    // If both L1/R1 were exact constant ranges and we didn't get anything
9380
    // here, we won't be able to deduce this.
9381
77.6M
    if (match(L1, m_APInt(Unused)) && 
match(R1, m_APInt(Unused))70.4M
)
9382
61.4M
      return std::nullopt;
9383
77.6M
  }
9384
9385
  // Can we infer anything when the two compares have matching operands?
9386
586M
  if (L0 == R0 && 
L1 == R1122M
)
9387
21.1M
    return ICmpInst::isImpliedByMatchingCmp(LPred, RPred);
9388
9389
  // It only really makes sense in the context of signed comparison for "X - Y
9390
  // must be positive if X >= Y and no overflow".
9391
  // Take SGT as an example:  L0:x > L1:y and C >= 0
9392
  //                      ==> R0:(x -nsw y) < R1:(-C) is false
9393
565M
  CmpInst::Predicate SignedLPred = LPred.getPreferredSignedPredicate();
9394
565M
  if ((SignedLPred == ICmpInst::ICMP_SGT ||
9395
565M
       
SignedLPred == ICmpInst::ICMP_SGE550M
) &&
9396
565M
      
match(R0, m_NSWSub(m_Specific(L0), m_Specific(L1)))29.5M
) {
9397
24.6k
    if (match(R1, m_NonPositive()) &&
9398
24.6k
        
ICmpInst::isImpliedByMatchingCmp(SignedLPred, RPred) == false4.28k
)
9399
847
      return false;
9400
24.6k
  }
9401
9402
  // Take SLT as an example:  L0:x < L1:y and C <= 0
9403
  //                      ==> R0:(x -nsw y) < R1:(-C) is true
9404
565M
  if ((SignedLPred == ICmpInst::ICMP_SLT ||
9405
565M
       
SignedLPred == ICmpInst::ICMP_SLE548M
) &&
9406
565M
      
match(R0, m_NSWSub(m_Specific(L0), m_Specific(L1)))28.7M
) {
9407
702
    if (match(R1, m_NonNegative()) &&
9408
702
        
ICmpInst::isImpliedByMatchingCmp(SignedLPred, RPred) == true671
)
9409
142
      return true;
9410
702
  }
9411
9412
  // L0 = R0 = L1 + R1, L0 >=u L1 implies R0 >=u R1, L0 <u L1 implies R0 <u R1
9413
565M
  if (L0 == R0 &&
9414
565M
      
(101M
LPred == ICmpInst::ICMP_ULT101M
||
LPred == ICmpInst::ICMP_UGE98.2M
) &&
9415
565M
      
(5.73M
RPred == ICmpInst::ICMP_ULT5.73M
||
RPred == ICmpInst::ICMP_UGE3.88M
) &&
9416
565M
      
match(L0, m_c_Add(m_Specific(L1), m_Specific(R1)))2.38M
)
9417
32
    return CmpPredicate::getMatching(LPred, RPred).has_value();
9418
9419
565M
  if (auto P = CmpPredicate::getMatching(LPred, RPred))
9420
136M
    return isImpliedCondOperands(*P, L0, L1, R0, R1);
9421
9422
428M
  return std::nullopt;
9423
565M
}
9424
9425
/// Return true if LHS implies RHS is true.  Return false if LHS implies RHS is
9426
/// false.  Otherwise, return std::nullopt if we can't infer anything.  We
9427
/// expect the RHS to be an icmp and the LHS to be an 'and', 'or', or a 'select'
9428
/// instruction.
9429
static std::optional<bool>
9430
isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred,
9431
                   const Value *RHSOp0, const Value *RHSOp1,
9432
35.6M
                   const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
9433
  // The LHS must be an 'or', 'and', or a 'select' instruction.
9434
35.6M
  assert((LHS->getOpcode() == Instruction::And ||
9435
35.6M
          LHS->getOpcode() == Instruction::Or ||
9436
35.6M
          LHS->getOpcode() == Instruction::Select) &&
9437
35.6M
         "Expected LHS to be 'and', 'or', or 'select'.");
9438
9439
35.6M
  assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit");
9440
9441
  // If the result of an 'or' is false, then we know both legs of the 'or' are
9442
  // false.  Similarly, if the result of an 'and' is true, then we know both
9443
  // legs of the 'and' are true.
9444
35.6M
  const Value *ALHS, *ARHS;
9445
35.6M
  if ((!LHSIsTrue && 
match(LHS, m_LogicalOr(m_Value(ALHS), m_Value(ARHS)))21.9M
) ||
9446
35.6M
      
(24.0M
LHSIsTrue24.0M
&&
match(LHS, m_LogicalAnd(m_Value(ALHS), m_Value(ARHS)))13.6M
)) {
9447
    // FIXME: Make this non-recursion.
9448
21.1M
    if (std::optional<bool> Implication = isImpliedCondition(
9449
21.1M
            ALHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1))
9450
69.1k
      return Implication;
9451
21.0M
    if (std::optional<bool> Implication = isImpliedCondition(
9452
21.0M
            ARHS, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, Depth + 1))
9453
68.1k
      return Implication;
9454
21.0M
    return std::nullopt;
9455
21.0M
  }
9456
14.5M
  return std::nullopt;
9457
35.6M
}
9458
9459
std::optional<bool>
9460
llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
9461
                         const Value *RHSOp0, const Value *RHSOp1,
9462
741M
                         const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
9463
  // Bail out when we hit the limit.
9464
741M
  if (Depth == MaxAnalysisRecursionDepth)
9465
806k
    return std::nullopt;
9466
9467
  // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
9468
  // example.
9469
740M
  if (RHSOp0->getType()->isVectorTy() != LHS->getType()->isVectorTy())
9470
811k
    return std::nullopt;
9471
9472
740M
  assert(LHS->getType()->isIntOrIntVectorTy(1) &&
9473
739M
         "Expected integer type only!");
9474
9475
  // Match not
9476
739M
  if (match(LHS, m_Not(m_Value(LHS))))
9477
681k
    LHSIsTrue = !LHSIsTrue;
9478
9479
  // Both LHS and RHS are icmps.
9480
739M
  if (const auto *LHSCmp = dyn_cast<ICmpInst>(LHS))
9481
653M
    return isImpliedCondICmps(LHSCmp->getCmpPredicate(), LHSCmp->getOperand(0),
9482
653M
                              LHSCmp->getOperand(1), RHSPred, RHSOp0, RHSOp1,
9483
653M
                              DL, LHSIsTrue);
9484
85.6M
  const Value *V;
9485
85.6M
  if (match(LHS, m_NUWTrunc(m_Value(V))))
9486
19.9M
    return isImpliedCondICmps(CmpInst::ICMP_NE, V,
9487
19.9M
                              ConstantInt::get(V->getType(), 0), RHSPred,
9488
19.9M
                              RHSOp0, RHSOp1, DL, LHSIsTrue);
9489
9490
  /// The LHS should be an 'or', 'and', or a 'select' instruction.  We expect
9491
  /// the RHS to be an icmp.
9492
  /// FIXME: Add support for and/or/select on the RHS.
9493
65.6M
  if (const Instruction *LHSI = dyn_cast<Instruction>(LHS)) {
9494
62.1M
    if ((LHSI->getOpcode() == Instruction::And ||
9495
62.1M
         
LHSI->getOpcode() == Instruction::Or51.5M
||
9496
62.1M
         
LHSI->getOpcode() == Instruction::Select41.5M
))
9497
35.6M
      return isImpliedCondAndOr(LHSI, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue,
9498
35.6M
                                Depth);
9499
62.1M
  }
9500
29.9M
  return std::nullopt;
9501
65.6M
}
9502
9503
std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
9504
                                             const DataLayout &DL,
9505
321M
                                             bool LHSIsTrue, unsigned Depth) {
9506
  // LHS ==> RHS by definition
9507
321M
  if (LHS == RHS)
9508
201k
    return LHSIsTrue;
9509
9510
  // Match not
9511
321M
  bool InvertRHS = false;
9512
321M
  if (match(RHS, m_Not(m_Value(RHS)))) {
9513
988k
    if (LHS == RHS)
9514
3.49k
      return !LHSIsTrue;
9515
984k
    InvertRHS = true;
9516
984k
  }
9517
9518
321M
  if (const ICmpInst *RHSCmp = dyn_cast<ICmpInst>(RHS)) {
9519
273M
    if (auto Implied = isImpliedCondition(
9520
273M
            LHS, RHSCmp->getCmpPredicate(), RHSCmp->getOperand(0),
9521
273M
            RHSCmp->getOperand(1), DL, LHSIsTrue, Depth))
9522
45.9k
      return InvertRHS ? 
!*Implied127
:
*Implied45.8k
;
9523
273M
    return std::nullopt;
9524
273M
  }
9525
9526
47.9M
  const Value *V;
9527
47.9M
  if (match(RHS, m_NUWTrunc(m_Value(V)))) {
9528
7.95M
    if (auto Implied = isImpliedCondition(LHS, CmpInst::ICMP_NE, V,
9529
7.95M
                                          ConstantInt::get(V->getType(), 0), DL,
9530
7.95M
                                          LHSIsTrue, Depth))
9531
1.78k
      return InvertRHS ? 
!*Implied13
:
*Implied1.77k
;
9532
7.95M
    return std::nullopt;
9533
7.95M
  }
9534
9535
39.9M
  if (Depth == MaxAnalysisRecursionDepth)
9536
172k
    return std::nullopt;
9537
9538
  // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
9539
  // LHS ==> !(RHS1 && RHS2) if LHS ==> !RHS1 or LHS ==> !RHS2
9540
39.8M
  const Value *RHS1, *RHS2;
9541
39.8M
  if (match(RHS, m_LogicalOr(m_Value(RHS1), m_Value(RHS2)))) {
9542
7.41M
    if (std::optional<bool> Imp =
9543
7.41M
            isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
9544
22.5k
      if (*Imp == true)
9545
791
        return !InvertRHS;
9546
7.41M
    if (std::optional<bool> Imp =
9547
7.41M
            isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
9548
11.8k
      if (*Imp == true)
9549
1.38k
        return !InvertRHS;
9550
7.41M
  }
9551
39.8M
  if (match(RHS, m_LogicalAnd(m_Value(RHS1), m_Value(RHS2)))) {
9552
8.07M
    if (std::optional<bool> Imp =
9553
8.07M
            isImpliedCondition(LHS, RHS1, DL, LHSIsTrue, Depth + 1))
9554
36.8k
      if (*Imp == false)
9555
14.5k
        return InvertRHS;
9556
8.05M
    if (std::optional<bool> Imp =
9557
8.05M
            isImpliedCondition(LHS, RHS2, DL, LHSIsTrue, Depth + 1))
9558
27.8k
      if (*Imp == false)
9559
8.07k
        return InvertRHS;
9560
8.05M
  }
9561
9562
39.7M
  return std::nullopt;
9563
39.8M
}
9564
9565
// Returns a pair (Condition, ConditionIsTrue), where Condition is a branch
9566
// condition dominating ContextI or nullptr, if no condition is found.
9567
static std::pair<Value *, bool>
9568
1.26G
getDomPredecessorCondition(const Instruction *ContextI) {
9569
1.26G
  if (!ContextI || 
!ContextI->getParent()1.19G
)
9570
69.0M
    return {nullptr, false};
9571
9572
  // TODO: This is a poor/cheap way to determine dominance. Should we use a
9573
  // dominator tree (eg, from a SimplifyQuery) instead?
9574
1.19G
  const BasicBlock *ContextBB = ContextI->getParent();
9575
1.19G
  const BasicBlock *PredBB = ContextBB->getSinglePredecessor();
9576
1.19G
  if (!PredBB)
9577
579M
    return {nullptr, false};
9578
9579
  // We need a conditional branch in the predecessor.
9580
618M
  Value *PredCond;
9581
618M
  BasicBlock *TrueBB, *FalseBB;
9582
618M
  if (!match(PredBB->getTerminator(), m_Br(m_Value(PredCond), TrueBB, FalseBB)))
9583
101M
    return {nullptr, false};
9584
9585
  // The branch should get simplified. Don't bother simplifying this condition.
9586
517M
  if (TrueBB == FalseBB)
9587
0
    return {nullptr, false};
9588
9589
517M
  assert((TrueBB == ContextBB || FalseBB == ContextBB) &&
9590
517M
         "Predecessor block does not point to successor?");
9591
9592
  // Is this condition implied by the predecessor condition?
9593
517M
  return {PredCond, TrueBB == ContextBB};
9594
517M
}
9595
9596
std::optional<bool> llvm::isImpliedByDomCondition(const Value *Cond,
9597
                                                  const Instruction *ContextI,
9598
411M
                                                  const DataLayout &DL) {
9599
411M
  assert(Cond->getType()->isIntOrIntVectorTy(1) && "Condition must be bool");
9600
411M
  auto PredCond = getDomPredecessorCondition(ContextI);
9601
411M
  if (PredCond.first)
9602
161M
    return isImpliedCondition(PredCond.first, Cond, DL, PredCond.second);
9603
249M
  return std::nullopt;
9604
411M
}
9605
9606
std::optional<bool> llvm::isImpliedByDomCondition(CmpPredicate Pred,
9607
                                                  const Value *LHS,
9608
                                                  const Value *RHS,
9609
                                                  const Instruction *ContextI,
9610
855M
                                                  const DataLayout &DL) {
9611
855M
  auto PredCond = getDomPredecessorCondition(ContextI);
9612
855M
  if (PredCond.first)
9613
355M
    return isImpliedCondition(PredCond.first, Pred, LHS, RHS, DL,
9614
355M
                              PredCond.second);
9615
500M
  return std::nullopt;
9616
855M
}
9617
9618
static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
9619
                              APInt &Upper, const InstrInfoQuery &IIQ,
9620
133M
                              bool PreferSignedRange) {
9621
133M
  unsigned Width = Lower.getBitWidth();
9622
133M
  const APInt *C;
9623
133M
  switch (BO.getOpcode()) {
9624
20.4M
  case Instruction::Sub:
9625
20.4M
    if (match(BO.getOperand(0), m_APInt(C))) {
9626
1.48M
      bool HasNSW = IIQ.hasNoSignedWrap(&BO);
9627
1.48M
      bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
9628
9629
      // If the caller expects a signed compare, then try to use a signed range.
9630
      // Otherwise if both no-wraps are set, use the unsigned range because it
9631
      // is never larger than the signed range. Example:
9632
      // "sub nuw nsw i8 -2, x" is unsigned [0, 254] vs. signed [-128, 126].
9633
      // "sub nuw nsw i8 2, x" is unsigned [0, 2] vs. signed [-125, 127].
9634
1.48M
      if (PreferSignedRange && 
HasNSW388k
&&
HasNUW313k
)
9635
11.8k
        HasNUW = false;
9636
9637
1.48M
      if (HasNUW) {
9638
        // 'sub nuw c, x' produces [0, C].
9639
109k
        Upper = *C + 1;
9640
1.37M
      } else if (HasNSW) {
9641
963k
        if (C->isNegative()) {
9642
          // 'sub nsw -C, x' produces [SINT_MIN, -C - SINT_MIN].
9643
3.33k
          Lower = APInt::getSignedMinValue(Width);
9644
3.33k
          Upper = *C - APInt::getSignedMaxValue(Width);
9645
960k
        } else {
9646
          // Note that sub 0, INT_MIN is not NSW. It techically is a signed wrap
9647
          // 'sub nsw C, x' produces [C - SINT_MAX, SINT_MAX].
9648
960k
          Lower = *C - APInt::getSignedMaxValue(Width);
9649
960k
          Upper = APInt::getSignedMinValue(Width);
9650
960k
        }
9651
963k
      }
9652
1.48M
    }
9653
20.4M
    break;
9654
40.3M
  case Instruction::Add:
9655
40.3M
    if (match(BO.getOperand(1), m_APInt(C)) && 
!C->isZero()27.6M
) {
9656
27.6M
      bool HasNSW = IIQ.hasNoSignedWrap(&BO);
9657
27.6M
      bool HasNUW = IIQ.hasNoUnsignedWrap(&BO);
9658
9659
      // If the caller expects a signed compare, then try to use a signed
9660
      // range. Otherwise if both no-wraps are set, use the unsigned range
9661
      // because it is never larger than the signed range. Example: "add nuw
9662
      // nsw i8 X, -2" is unsigned [254,255] vs. signed [-128, 125].
9663
27.6M
      if (PreferSignedRange && 
HasNSW4.78M
&&
HasNUW2.34M
)
9664
187k
        HasNUW = false;
9665
9666
27.6M
      if (HasNUW) {
9667
        // 'add nuw x, C' produces [C, UINT_MAX].
9668
7.25M
        Lower = *C;
9669
20.3M
      } else if (HasNSW) {
9670
7.22M
        if (C->isNegative()) {
9671
          // 'add nsw x, -C' produces [SINT_MIN, SINT_MAX - C].
9672
5.85M
          Lower = APInt::getSignedMinValue(Width);
9673
5.85M
          Upper = APInt::getSignedMaxValue(Width) + *C + 1;
9674
5.85M
        } else {
9675
          // 'add nsw x, +C' produces [SINT_MIN + C, SINT_MAX].
9676
1.37M
          Lower = APInt::getSignedMinValue(Width) + *C;
9677
1.37M
          Upper = APInt::getSignedMaxValue(Width) + 1;
9678
1.37M
        }
9679
7.22M
      }
9680
27.6M
    }
9681
40.3M
    break;
9682
9683
39.7M
  case Instruction::And:
9684
39.7M
    if (match(BO.getOperand(1), m_APInt(C)))
9685
      // 'and x, C' produces [0, C].
9686
35.6M
      Upper = *C + 1;
9687
    // X & -X is a power of two or zero. So we can cap the value at max power of
9688
    // two.
9689
39.7M
    if (match(BO.getOperand(0), m_Neg(m_Specific(BO.getOperand(1)))) ||
9690
39.7M
        match(BO.getOperand(1), m_Neg(m_Specific(BO.getOperand(0)))))
9691
10.6k
      Upper = APInt::getSignedMinValue(Width) + 1;
9692
39.7M
    break;
9693
9694
2.40M
  case Instruction::Or:
9695
2.40M
    if (match(BO.getOperand(1), m_APInt(C)))
9696
      // 'or x, C' produces [C, UINT_MAX].
9697
381k
      Lower = *C;
9698
2.40M
    break;
9699
9700
7.60M
  case Instruction::AShr:
9701
7.60M
    if (match(BO.getOperand(1), m_APInt(C)) && 
C->ult(Width)7.51M
) {
9702
      // 'ashr x, C' produces [INT_MIN >> C, INT_MAX >> C].
9703
7.51M
      Lower = APInt::getSignedMinValue(Width).ashr(*C);
9704
7.51M
      Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
9705
7.51M
    } else 
if (86.7k
match(BO.getOperand(0), m_APInt(C))86.7k
) {
9706
307
      unsigned ShiftAmount = Width - 1;
9707
307
      if (!C->isZero() && IIQ.isExact(&BO))
9708
0
        ShiftAmount = C->countr_zero();
9709
307
      if (C->isNegative()) {
9710
        // 'ashr C, x' produces [C, C >> (Width-1)]
9711
216
        Lower = *C;
9712
216
        Upper = C->ashr(ShiftAmount) + 1;
9713
216
      } else {
9714
        // 'ashr C, x' produces [C >> (Width-1), C]
9715
91
        Lower = C->ashr(ShiftAmount);
9716
91
        Upper = *C + 1;
9717
91
      }
9718
307
    }
9719
7.60M
    break;
9720
9721
4.73M
  case Instruction::LShr:
9722
4.73M
    if (match(BO.getOperand(1), m_APInt(C)) && 
C->ult(Width)3.66M
) {
9723
      // 'lshr x, C' produces [0, UINT_MAX >> C].
9724
3.66M
      Upper = APInt::getAllOnes(Width).lshr(*C) + 1;
9725
3.66M
    } else 
if (1.07M
match(BO.getOperand(0), m_APInt(C))1.07M
) {
9726
      // 'lshr C, x' produces [C >> (Width-1), C].
9727
70.4k
      unsigned ShiftAmount = Width - 1;
9728
70.4k
      if (!C->isZero() && 
IIQ.isExact(&BO)70.4k
)
9729
838
        ShiftAmount = C->countr_zero();
9730
70.4k
      Lower = C->lshr(ShiftAmount);
9731
70.4k
      Upper = *C + 1;
9732
70.4k
    }
9733
4.73M
    break;
9734
9735
4.78M
  case Instruction::Shl:
9736
4.78M
    if (match(BO.getOperand(0), m_APInt(C))) {
9737
382k
      if (IIQ.hasNoUnsignedWrap(&BO)) {
9738
        // 'shl nuw C, x' produces [C, C << CLZ(C)]
9739
326k
        Lower = *C;
9740
326k
        Upper = Lower.shl(Lower.countl_zero()) + 1;
9741
326k
      } else 
if (56.5k
BO.hasNoSignedWrap()56.5k
) { // TODO: What if both nuw+nsw?
9742
28.5k
        if (C->isNegative()) {
9743
          // 'shl nsw C, x' produces [C << CLO(C)-1, C]
9744
28.5k
          unsigned ShiftAmount = C->countl_one() - 1;
9745
28.5k
          Lower = C->shl(ShiftAmount);
9746
28.5k
          Upper = *C + 1;
9747
28.5k
        } else {
9748
          // 'shl nsw C, x' produces [C, C << CLZ(C)-1]
9749
6
          unsigned ShiftAmount = C->countl_zero() - 1;
9750
6
          Lower = *C;
9751
6
          Upper = C->shl(ShiftAmount) + 1;
9752
6
        }
9753
28.5k
      } else {
9754
        // If lowbit is set, value can never be zero.
9755
28.0k
        if ((*C)[0])
9756
4.88k
          Lower = APInt::getOneBitSet(Width, 0);
9757
        // If we are shifting a constant the largest it can be is if the longest
9758
        // sequence of consecutive ones is shifted to the highbits (breaking
9759
        // ties for which sequence is higher). At the moment we take a liberal
9760
        // upper bound on this by just popcounting the constant.
9761
        // TODO: There may be a bitwise trick for it longest/highest
9762
        // consecutative sequence of ones (naive method is O(Width) loop).
9763
28.0k
        Upper = APInt::getHighBitsSet(Width, C->popcount()) + 1;
9764
28.0k
      }
9765
4.40M
    } else if (match(BO.getOperand(1), m_APInt(C)) && 
C->ult(Width)3.95M
) {
9766
3.95M
      Upper = APInt::getBitsSetFrom(Width, C->getZExtValue()) + 1;
9767
3.95M
    }
9768
4.78M
    break;
9769
9770
2.49M
  case Instruction::SDiv:
9771
2.49M
    if (match(BO.getOperand(1), m_APInt(C))) {
9772
2.16M
      APInt IntMin = APInt::getSignedMinValue(Width);
9773
2.16M
      APInt IntMax = APInt::getSignedMaxValue(Width);
9774
2.16M
      if (C->isAllOnes()) {
9775
        // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
9776
        //    where C != -1 and C != 0 and C != 1
9777
17
        Lower = IntMin + 1;
9778
17
        Upper = IntMax + 1;
9779
2.16M
      } else if (C->countl_zero() < Width - 1) {
9780
        // 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
9781
        //    where C != -1 and C != 0 and C != 1
9782
2.16M
        Lower = IntMin.sdiv(*C);
9783
2.16M
        Upper = IntMax.sdiv(*C);
9784
2.16M
        if (Lower.sgt(Upper))
9785
35.6k
          std::swap(Lower, Upper);
9786
2.16M
        Upper = Upper + 1;
9787
2.16M
        assert(Upper != Lower && "Upper part of range has wrapped!");
9788
2.16M
      }
9789
2.16M
    } else 
if (327k
match(BO.getOperand(0), m_APInt(C))327k
) {
9790
220k
      if (C->isMinSignedValue()) {
9791
        // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
9792
568
        Lower = *C;
9793
568
        Upper = Lower.lshr(1) + 1;
9794
219k
      } else {
9795
        // 'sdiv C, x' produces [-|C|, |C|].
9796
219k
        Upper = C->abs() + 1;
9797
219k
        Lower = (-Upper) + 1;
9798
219k
      }
9799
220k
    }
9800
2.49M
    break;
9801
9802
4.71M
  case Instruction::UDiv:
9803
4.71M
    if (match(BO.getOperand(1), m_APInt(C)) && 
!C->isZero()4.53M
) {
9804
      // 'udiv x, C' produces [0, UINT_MAX / C].
9805
4.53M
      Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
9806
4.53M
    } else 
if (182k
match(BO.getOperand(0), m_APInt(C))182k
) {
9807
      // 'udiv C, x' produces [0, C].
9808
9.91k
      Upper = *C + 1;
9809
9.91k
    }
9810
4.71M
    break;
9811
9812
298k
  case Instruction::SRem:
9813
298k
    if (match(BO.getOperand(1), m_APInt(C))) {
9814
      // 'srem x, C' produces (-|C|, |C|).
9815
180k
      Upper = C->abs();
9816
180k
      Lower = (-Upper) + 1;
9817
180k
    } else 
if (117k
match(BO.getOperand(0), m_APInt(C))117k
) {
9818
696
      if (C->isNegative()) {
9819
        // 'srem -|C|, x' produces [-|C|, 0].
9820
559
        Upper = 1;
9821
559
        Lower = *C;
9822
559
      } else {
9823
        // 'srem |C|, x' produces [0, |C|].
9824
137
        Upper = *C + 1;
9825
137
      }
9826
696
    }
9827
298k
    break;
9828
9829
263k
  case Instruction::URem:
9830
263k
    if (match(BO.getOperand(1), m_APInt(C)))
9831
      // 'urem x, C' produces [0, C).
9832
122k
      Upper = *C;
9833
140k
    else if (match(BO.getOperand(0), m_APInt(C)))
9834
      // 'urem C, x' produces [0, C].
9835
1.94k
      Upper = *C + 1;
9836
263k
    break;
9837
9838
6.01M
  default:
9839
6.01M
    break;
9840
133M
  }
9841
133M
}
9842
9843
static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II,
9844
14.0M
                                          bool UseInstrInfo) {
9845
14.0M
  unsigned Width = II.getType()->getScalarSizeInBits();
9846
14.0M
  const APInt *C;
9847
14.0M
  switch (II.getIntrinsicID()) {
9848
38.6k
  case Intrinsic::ctlz:
9849
218k
  case Intrinsic::cttz: {
9850
218k
    APInt Upper(Width, Width);
9851
218k
    if (!UseInstrInfo || !match(II.getArgOperand(1), m_One()))
9852
19.1k
      Upper += 1;
9853
    // Maximum of set/clear bits is the bit width.
9854
218k
    return ConstantRange::getNonEmpty(APInt::getZero(Width), Upper);
9855
38.6k
  }
9856
157k
  case Intrinsic::ctpop:
9857
    // Maximum of set/clear bits is the bit width.
9858
157k
    return ConstantRange::getNonEmpty(APInt::getZero(Width),
9859
157k
                                      APInt(Width, Width) + 1);
9860
61.5k
  case Intrinsic::uadd_sat:
9861
    // uadd.sat(x, C) produces [C, UINT_MAX].
9862
61.5k
    if (match(II.getOperand(0), m_APInt(C)) ||
9863
61.5k
        match(II.getOperand(1), m_APInt(C)))
9864
54.3k
      return ConstantRange::getNonEmpty(*C, APInt::getZero(Width));
9865
7.15k
    break;
9866
7.15k
  case Intrinsic::sadd_sat:
9867
7.13k
    if (match(II.getOperand(0), m_APInt(C)) ||
9868
7.13k
        
match(II.getOperand(1), m_APInt(C))7.13k
) {
9869
1.19k
      if (C->isNegative())
9870
        // sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
9871
336
        return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9872
336
                                          APInt::getSignedMaxValue(Width) + *C +
9873
336
                                              1);
9874
9875
      // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
9876
859
      return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C,
9877
859
                                        APInt::getSignedMaxValue(Width) + 1);
9878
1.19k
    }
9879
5.94k
    break;
9880
286k
  case Intrinsic::usub_sat:
9881
    // usub.sat(C, x) produces [0, C].
9882
286k
    if (match(II.getOperand(0), m_APInt(C)))
9883
2.96k
      return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1);
9884
9885
    // usub.sat(x, C) produces [0, UINT_MAX - C].
9886
283k
    if (match(II.getOperand(1), m_APInt(C)))
9887
31.8k
      return ConstantRange::getNonEmpty(APInt::getZero(Width),
9888
31.8k
                                        APInt::getMaxValue(Width) - *C + 1);
9889
252k
    break;
9890
252k
  case Intrinsic::ssub_sat:
9891
1.21k
    if (match(II.getOperand(0), m_APInt(C))) {
9892
30
      if (C->isNegative())
9893
        // ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
9894
0
        return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9895
0
                                          *C - APInt::getSignedMinValue(Width) +
9896
0
                                              1);
9897
9898
      // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
9899
30
      return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width),
9900
30
                                        APInt::getSignedMaxValue(Width) + 1);
9901
1.18k
    } else if (match(II.getOperand(1), m_APInt(C))) {
9902
29
      if (C->isNegative())
9903
        // ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
9904
0
        return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C,
9905
0
                                          APInt::getSignedMaxValue(Width) + 1);
9906
9907
      // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
9908
29
      return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9909
29
                                        APInt::getSignedMaxValue(Width) - *C +
9910
29
                                            1);
9911
29
    }
9912
1.16k
    break;
9913
5.81M
  case Intrinsic::umin:
9914
8.60M
  case Intrinsic::umax:
9915
9.43M
  case Intrinsic::smin:
9916
11.9M
  case Intrinsic::smax:
9917
11.9M
    if (!match(II.getOperand(0), m_APInt(C)) &&
9918
11.9M
        
!match(II.getOperand(1), m_APInt(C))11.9M
)
9919
3.75M
      break;
9920
9921
8.20M
    switch (II.getIntrinsicID()) {
9922
3.90M
    case Intrinsic::umin:
9923
3.90M
      return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1);
9924
1.81M
    case Intrinsic::umax:
9925
1.81M
      return ConstantRange::getNonEmpty(*C, APInt::getZero(Width));
9926
100k
    case Intrinsic::smin:
9927
100k
      return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
9928
100k
                                        *C + 1);
9929
2.38M
    case Intrinsic::smax:
9930
2.38M
      return ConstantRange::getNonEmpty(*C,
9931
2.38M
                                        APInt::getSignedMaxValue(Width) + 1);
9932
0
    default:
9933
0
      llvm_unreachable("Must be min/max intrinsic");
9934
8.20M
    }
9935
0
    break;
9936
316k
  case Intrinsic::abs:
9937
    // If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
9938
    // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9939
316k
    if (match(II.getOperand(1), m_One()))
9940
89.0k
      return ConstantRange::getNonEmpty(APInt::getZero(Width),
9941
89.0k
                                        APInt::getSignedMaxValue(Width) + 1);
9942
9943
227k
    return ConstantRange::getNonEmpty(APInt::getZero(Width),
9944
227k
                                      APInt::getSignedMinValue(Width) + 1);
9945
0
  case Intrinsic::vscale:
9946
0
    if (!II.getParent() || !II.getFunction())
9947
0
      break;
9948
0
    return getVScaleRange(II.getFunction(), Width);
9949
1.08M
  default:
9950
1.08M
    break;
9951
14.0M
  }
9952
9953
5.10M
  return ConstantRange::getFull(Width);
9954
14.0M
}
9955
9956
static ConstantRange getRangeForSelectPattern(const SelectInst &SI,
9957
6.36M
                                              const InstrInfoQuery &IIQ) {
9958
6.36M
  unsigned BitWidth = SI.getType()->getScalarSizeInBits();
9959
6.36M
  const Value *LHS = nullptr, *RHS = nullptr;
9960
6.36M
  SelectPatternResult R = matchSelectPattern(&SI, LHS, RHS);
9961
6.36M
  if (R.Flavor == SPF_UNKNOWN)
9962
6.28M
    return ConstantRange::getFull(BitWidth);
9963
9964
76.0k
  if (R.Flavor == SelectPatternFlavor::SPF_ABS) {
9965
    // If the negation part of the abs (in RHS) has the NSW flag,
9966
    // then the result of abs(X) is [0..SIGNED_MAX],
9967
    // otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
9968
20.5k
    if (match(RHS, m_Neg(m_Specific(LHS))) &&
9969
20.5k
        IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
9970
4.79k
      return ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
9971
4.79k
                                        APInt::getSignedMaxValue(BitWidth) + 1);
9972
9973
15.8k
    return ConstantRange::getNonEmpty(APInt::getZero(BitWidth),
9974
15.8k
                                      APInt::getSignedMinValue(BitWidth) + 1);
9975
20.5k
  }
9976
9977
55.4k
  if (R.Flavor == SelectPatternFlavor::SPF_NABS) {
9978
    // The result of -abs(X) is <= 0.
9979
5
    return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth),
9980
5
                                      APInt(BitWidth, 1));
9981
5
  }
9982
9983
55.4k
  const APInt *C;
9984
55.4k
  if (!match(LHS, m_APInt(C)) && 
!match(RHS, m_APInt(C))42.6k
)
9985
35.7k
    return ConstantRange::getFull(BitWidth);
9986
9987
19.6k
  switch (R.Flavor) {
9988
1.14k
  case SPF_UMIN:
9989
1.14k
    return ConstantRange::getNonEmpty(APInt::getZero(BitWidth), *C + 1);
9990
10.6k
  case SPF_UMAX:
9991
10.6k
    return ConstantRange::getNonEmpty(*C, APInt::getZero(BitWidth));
9992
5.23k
  case SPF_SMIN:
9993
5.23k
    return ConstantRange::getNonEmpty(APInt::getSignedMinValue(BitWidth),
9994
5.23k
                                      *C + 1);
9995
2.60k
  case SPF_SMAX:
9996
2.60k
    return ConstantRange::getNonEmpty(*C,
9997
2.60k
                                      APInt::getSignedMaxValue(BitWidth) + 1);
9998
0
  default:
9999
0
    return ConstantRange::getFull(BitWidth);
10000
19.6k
  }
10001
19.6k
}
10002
10003
651k
static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) {
10004
  // The maximum representable value of a half is 65504. For floats the maximum
10005
  // value is 3.4e38 which requires roughly 129 bits.
10006
651k
  unsigned BitWidth = I->getType()->getScalarSizeInBits();
10007
651k
  if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy())
10008
651k
    return;
10009
0
  if (isa<FPToSIInst>(I) && BitWidth >= 17) {
10010
0
    Lower = APInt(BitWidth, -65504, true);
10011
0
    Upper = APInt(BitWidth, 65505);
10012
0
  }
10013
10014
0
  if (isa<FPToUIInst>(I) && BitWidth >= 16) {
10015
    // For a fptoui the lower limit is left as 0.
10016
0
    Upper = APInt(BitWidth, 65505);
10017
0
  }
10018
0
}
10019
10020
ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
10021
                                         bool UseInstrInfo, AssumptionCache *AC,
10022
                                         const Instruction *CtxI,
10023
                                         const DominatorTree *DT,
10024
824M
                                         unsigned Depth) {
10025
824M
  assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
10026
10027
824M
  if (Depth == MaxAnalysisRecursionDepth)
10028
690k
    return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
10029
10030
823M
  if (auto *C = dyn_cast<Constant>(V))
10031
271M
    return C->toConstantRange();
10032
10033
552M
  unsigned BitWidth = V->getType()->getScalarSizeInBits();
10034
552M
  InstrInfoQuery IIQ(UseInstrInfo);
10035
552M
  ConstantRange CR = ConstantRange::getFull(BitWidth);
10036
552M
  if (auto *BO = dyn_cast<BinaryOperator>(V)) {
10037
133M
    APInt Lower = APInt(BitWidth, 0);
10038
133M
    APInt Upper = APInt(BitWidth, 0);
10039
    // TODO: Return ConstantRange.
10040
133M
    setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned);
10041
133M
    CR = ConstantRange::getNonEmpty(Lower, Upper);
10042
418M
  } else if (auto *II = dyn_cast<IntrinsicInst>(V))
10043
14.0M
    CR = getRangeForIntrinsic(*II, UseInstrInfo);
10044
404M
  else if (auto *SI = dyn_cast<SelectInst>(V)) {
10045
6.36M
    ConstantRange CRTrue = computeConstantRange(
10046
6.36M
        SI->getTrueValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
10047
6.36M
    ConstantRange CRFalse = computeConstantRange(
10048
6.36M
        SI->getFalseValue(), ForSigned, UseInstrInfo, AC, CtxI, DT, Depth + 1);
10049
6.36M
    CR = CRTrue.unionWith(CRFalse);
10050
6.36M
    CR = CR.intersectWith(getRangeForSelectPattern(*SI, IIQ));
10051
397M
  } else if (isa<FPToUIInst>(V) || 
isa<FPToSIInst>(V)397M
) {
10052
651k
    APInt Lower = APInt(BitWidth, 0);
10053
651k
    APInt Upper = APInt(BitWidth, 0);
10054
    // TODO: Return ConstantRange.
10055
651k
    setLimitForFPToI(cast<Instruction>(V), Lower, Upper);
10056
651k
    CR = ConstantRange::getNonEmpty(Lower, Upper);
10057
397M
  } else if (const auto *A = dyn_cast<Argument>(V))
10058
22.4M
    if (std::optional<ConstantRange> Range = A->getRange())
10059
866k
      CR = *Range;
10060
10061
552M
  if (auto *I = dyn_cast<Instruction>(V)) {
10062
529M
    if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range))
10063
20.9M
      CR = CR.intersectWith(getConstantRangeFromMetadata(*Range));
10064
10065
529M
    if (const auto *CB = dyn_cast<CallBase>(V))
10066
37.5M
      if (std::optional<ConstantRange> Range = CB->getRange())
10067
2.16M
        CR = CR.intersectWith(*Range);
10068
529M
  }
10069
10070
552M
  if (CtxI && 
AC8.60M
) {
10071
    // Try to restrict the range based on information from assumptions.
10072
8.60M
    for (auto &AssumeVH : AC->assumptionsFor(V)) {
10073
213k
      if (!AssumeVH)
10074
57.5k
        continue;
10075
155k
      CallInst *I = cast<CallInst>(AssumeVH);
10076
155k
      assert(I->getParent()->getParent() == CtxI->getParent()->getParent() &&
10077
155k
             "Got assumption for the wrong function!");
10078
155k
      assert(I->getIntrinsicID() == Intrinsic::assume &&
10079
155k
             "must be an assume intrinsic");
10080
10081
155k
      if (!isValidAssumeForContext(I, CtxI, DT))
10082
98.9k
        continue;
10083
56.6k
      Value *Arg = I->getArgOperand(0);
10084
56.6k
      ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
10085
      // Currently we just use information from comparisons.
10086
56.6k
      if (!Cmp || 
Cmp->getOperand(0) != V56.4k
)
10087
13.5k
        continue;
10088
      // TODO: Set "ForSigned" parameter via Cmp->isSigned()?
10089
43.1k
      ConstantRange RHS =
10090
43.1k
          computeConstantRange(Cmp->getOperand(1), /* ForSigned */ false,
10091
43.1k
                               UseInstrInfo, AC, I, DT, Depth + 1);
10092
43.1k
      CR = CR.intersectWith(
10093
43.1k
          ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS));
10094
43.1k
    }
10095
8.60M
  }
10096
10097
552M
  return CR;
10098
823M
}
10099
10100
static void
10101
addValueAffectedByCondition(Value *V,
10102
304M
                            function_ref<void(Value *)> InsertAffected) {
10103
304M
  assert(V != nullptr);
10104
304M
  if (isa<Argument>(V) || 
isa<GlobalValue>(V)291M
) {
10105
12.7M
    InsertAffected(V);
10106
291M
  } else if (auto *I = dyn_cast<Instruction>(V)) {
10107
252M
    InsertAffected(V);
10108
10109
    // Peek through unary operators to find the source of the condition.
10110
252M
    Value *Op;
10111
252M
    if (match(I, m_CombineOr(m_PtrToInt(m_Value(Op)), m_Trunc(m_Value(Op))))) {
10112
9.15M
      if (isa<Instruction>(Op) || 
isa<Argument>(Op)2.14M
)
10113
9.15M
        InsertAffected(Op);
10114
9.15M
    }
10115
252M
  }
10116
304M
}
10117
10118
void llvm::findValuesAffectedByCondition(
10119
241M
    Value *Cond, bool IsAssume, function_ref<void(Value *)> InsertAffected) {
10120
304M
  auto AddAffected = [&InsertAffected](Value *V) {
10121
304M
    addValueAffectedByCondition(V, InsertAffected);
10122
304M
  };
10123
10124
241M
  auto AddCmpOperands = [&AddAffected, IsAssume](Value *LHS, Value *RHS) {
10125
66.0M
    if (IsAssume) {
10126
15.9M
      AddAffected(LHS);
10127
15.9M
      AddAffected(RHS);
10128
50.0M
    } else if (match(RHS, m_Constant()))
10129
30.4M
      AddAffected(LHS);
10130
66.0M
  };
10131
10132
241M
  SmallVector<Value *, 8> Worklist;
10133
241M
  SmallPtrSet<Value *, 8> Visited;
10134
241M
  Worklist.push_back(Cond);
10135
498M
  while (!Worklist.empty()) {
10136
256M
    Value *V = Worklist.pop_back_val();
10137
256M
    if (!Visited.insert(V).second)
10138
4.64k
      continue;
10139
10140
256M
    CmpPredicate Pred;
10141
256M
    Value *A, *B, *X;
10142
10143
256M
    if (IsAssume) {
10144
24.0M
      AddAffected(V);
10145
24.0M
      if (match(V, m_Not(m_Value(X))))
10146
140k
        AddAffected(X);
10147
24.0M
    }
10148
10149
256M
    if (match(V, m_LogicalOp(m_Value(A), m_Value(B)))) {
10150
      // assume(A && B) is split to -> assume(A); assume(B);
10151
      // assume(!(A || B)) is split to -> assume(!A); assume(!B);
10152
      // Finally, assume(A || B) / assume(!(A && B)) generally don't provide
10153
      // enough information to be worth handling (intersection of information as
10154
      // opposed to union).
10155
7.76M
      if (!IsAssume) {
10156
7.72M
        Worklist.push_back(A);
10157
7.72M
        Worklist.push_back(B);
10158
7.72M
      }
10159
249M
    } else if (match(V, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
10160
227M
      bool HasRHSC = match(B, m_ConstantInt());
10161
227M
      if (ICmpInst::isEquality(Pred)) {
10162
163M
        AddAffected(A);
10163
163M
        if (IsAssume)
10164
7.15M
          AddAffected(B);
10165
163M
        if (HasRHSC) {
10166
79.5M
          Value *Y;
10167
          // (X & C) or (X | C).
10168
          // (X << C) or (X >>_s C) or (X >>_u C).
10169
79.5M
          if (match(A, m_Shift(m_Value(X), m_ConstantInt())))
10170
90.0k
            AddAffected(X);
10171
79.4M
          else if (match(A, m_And(m_Value(X), m_Value(Y))) ||
10172
79.4M
                   
match(A, m_Or(m_Value(X), m_Value(Y)))62.2M
) {
10173
17.4M
            AddAffected(X);
10174
17.4M
            AddAffected(Y);
10175
17.4M
          }
10176
79.5M
        }
10177
163M
      } else {
10178
63.7M
        AddCmpOperands(A, B);
10179
63.7M
        if (HasRHSC) {
10180
          // Handle (A + C1) u< C2, which is the canonical form of
10181
          // A > C3 && A < C4.
10182
44.2M
          if (match(A, m_AddLike(m_Value(X), m_ConstantInt())))
10183
2.36M
            AddAffected(X);
10184
10185
44.2M
          if (ICmpInst::isUnsigned(Pred)) {
10186
26.8M
            Value *Y;
10187
            // X & Y u> C    -> X >u C && Y >u C
10188
            // X | Y u< C    -> X u< C && Y u< C
10189
            // X nuw+ Y u< C -> X u< C && Y u< C
10190
26.8M
            if (match(A, m_And(m_Value(X), m_Value(Y))) ||
10191
26.8M
                
match(A, m_Or(m_Value(X), m_Value(Y)))26.4M
||
10192
26.8M
                
match(A, m_NUWAdd(m_Value(X), m_Value(Y)))26.3M
) {
10193
670k
              AddAffected(X);
10194
670k
              AddAffected(Y);
10195
670k
            }
10196
            // X nuw- Y u> C -> X u> C
10197
26.8M
            if (match(A, m_NUWSub(m_Value(X), m_Value())))
10198
83.7k
              AddAffected(X);
10199
26.8M
          }
10200
44.2M
        }
10201
10202
        // Handle icmp slt/sgt (bitcast X to int), 0/-1, which is supported
10203
        // by computeKnownFPClass().
10204
63.7M
        if (match(A, m_ElementWiseBitCast(m_Value(X)))) {
10205
45.6k
          if (Pred == ICmpInst::ICMP_SLT && 
match(B, m_Zero())10.0k
)
10206
9.92k
            InsertAffected(X);
10207
35.7k
          else if (Pred == ICmpInst::ICMP_SGT && 
match(B, m_AllOnes())1.63k
)
10208
1.44k
            InsertAffected(X);
10209
45.6k
        }
10210
63.7M
      }
10211
10212
227M
      if (HasRHSC && 
match(A, m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))123M
)
10213
53.2k
        AddAffected(X);
10214
227M
    } else 
if (21.7M
match(V, m_FCmp(Pred, m_Value(A), m_Value(B)))21.7M
) {
10215
2.31M
      AddCmpOperands(A, B);
10216
10217
      // fcmp fneg(x), y
10218
      // fcmp fabs(x), y
10219
      // fcmp fneg(fabs(x)), y
10220
2.31M
      if (match(A, m_FNeg(m_Value(A))))
10221
18
        AddAffected(A);
10222
2.31M
      if (match(A, m_FAbs(m_Value(A))))
10223
336k
        AddAffected(A);
10224
10225
19.4M
    } else if (match(V, m_Intrinsic<Intrinsic::is_fpclass>(m_Value(A),
10226
19.4M
                                                           m_Value()))) {
10227
      // Handle patterns that computeKnownFPClass() support.
10228
3.70k
      AddAffected(A);
10229
19.4M
    } else if (!IsAssume && 
match(V, m_Trunc(m_Value(X)))18.5M
) {
10230
      // Assume is checked here as X is already added above for assumes in
10231
      // addValueAffectedByCondition
10232
7.71M
      AddAffected(X);
10233
11.7M
    } else if (!IsAssume && 
match(V, m_Not(m_Value(X)))10.7M
) {
10234
      // Assume is checked here to avoid issues with ephemeral values
10235
260k
      Worklist.push_back(X);
10236
260k
    }
10237
256M
  }
10238
241M
}
10239
10240
589M
const Value *llvm::stripNullTest(const Value *V) {
10241
  // (X >> C) or/add (X & mask(C) != 0)
10242
589M
  if (const auto *BO = dyn_cast<BinaryOperator>(V)) {
10243
82.9M
    if (BO->getOpcode() == Instruction::Add ||
10244
82.9M
        
BO->getOpcode() == Instruction::Or68.0M
) {
10245
15.6M
      const Value *X;
10246
15.6M
      const APInt *C1, *C2;
10247
15.6M
      if (match(BO, m_c_BinOp(m_LShr(m_Value(X), m_APInt(C1)),
10248
15.6M
                              m_ZExt(m_SpecificICmp(
10249
15.6M
                                  ICmpInst::ICMP_NE,
10250
15.6M
                                  m_And(m_Deferred(X), m_LowBitMask(C2)),
10251
15.6M
                                  m_Zero())))) &&
10252
15.6M
          
C2->popcount() == C1->getZExtValue()5.57k
)
10253
5.57k
        return X;
10254
15.6M
    }
10255
82.9M
  }
10256
589M
  return nullptr;
10257
589M
}
10258
10259
92.0M
Value *llvm::stripNullTest(Value *V) {
10260
92.0M
  return const_cast<Value *>(stripNullTest(const_cast<const Value *>(V)));
10261
92.0M
}