Taewook Oh | 2da205d | 2019-12-02 18:15:22 | [diff] [blame] | 1 | diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h |
| 2 | index c8965936fb9..41d6c23b8d0 100644 |
| 3 | --- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h |
| 4 | +++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h |
| 5 | @@ -34,6 +34,7 @@ namespace llvm { |
| 6 | class Function; |
| 7 | class LoopInfo; |
| 8 | class raw_ostream; |
| 9 | +class PostDominatorTree; |
| 10 | class TargetLibraryInfo; |
| 11 | class Value; |
| 12 | |
| 13 | @@ -187,8 +188,10 @@ private: |
| 14 | /// Track the set of blocks that always lead to a cold call. |
| 15 | SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall; |
| 16 | |
| 17 | - void updatePostDominatedByUnreachable(const BasicBlock *BB); |
| 18 | - void updatePostDominatedByColdCall(const BasicBlock *BB); |
| 19 | + void computePostDominatedByUnreachable(const Function &F, |
| 20 | + PostDominatorTree *PDT); |
| 21 | + void computePostDominatedByColdCall(const Function &F, |
| 22 | + PostDominatorTree *PDT); |
| 23 | bool calcUnreachableHeuristics(const BasicBlock *BB); |
| 24 | bool calcMetadataWeights(const BasicBlock *BB); |
| 25 | bool calcColdCallHeuristics(const BasicBlock *BB); |
| 26 | diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp |
| 27 | index 7bd237b9ad5..ffba65b5ed5 100644 |
| 28 | --- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp |
| 29 | +++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp |
| 30 | @@ -16,6 +16,7 @@ |
| 31 | #include "llvm/ADT/STLExtras.h" |
| 32 | #include "llvm/ADT/SmallVector.h" |
| 33 | #include "llvm/Analysis/LoopInfo.h" |
| 34 | +#include "llvm/Analysis/PostDominators.h" |
| 35 | #include "llvm/Analysis/TargetLibraryInfo.h" |
| 36 | #include "llvm/IR/Attributes.h" |
| 37 | #include "llvm/IR/BasicBlock.h" |
| 38 | @@ -146,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1; |
| 39 | /// instruction. This is essentially never taken. |
| 40 | static const uint32_t IH_NONTAKEN_WEIGHT = 1; |
| 41 | |
| 42 | -/// Add \p BB to PostDominatedByUnreachable set if applicable. |
| 43 | -void |
| 44 | -BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) { |
| 45 | - const Instruction *TI = BB->getTerminator(); |
| 46 | - if (TI->getNumSuccessors() == 0) { |
| 47 | - if (isa<UnreachableInst>(TI) || |
| 48 | - // If this block is terminated by a call to |
| 49 | - // @llvm.experimental.deoptimize then treat it like an unreachable since |
| 50 | - // the @llvm.experimental.deoptimize call is expected to practically |
| 51 | - // never execute. |
| 52 | - BB->getTerminatingDeoptimizeCall()) |
| 53 | - PostDominatedByUnreachable.insert(BB); |
| 54 | - return; |
| 55 | - } |
| 56 | +static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT, |
| 57 | + SmallVectorImpl<const BasicBlock *> &WorkList, |
| 58 | + SmallPtrSetImpl<const BasicBlock *> &TargetSet) { |
| 59 | + SmallVector<BasicBlock *, 8> Descendants; |
| 60 | + SmallPtrSet<const BasicBlock *, 16> NewItems; |
| 61 | + |
| 62 | + PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants); |
| 63 | + for (auto *BB : Descendants) |
| 64 | + if (TargetSet.insert(BB).second) |
| 65 | + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) |
| 66 | + if (!TargetSet.count(*PI)) |
| 67 | + NewItems.insert(*PI); |
| 68 | + WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end()); |
| 69 | +} |
| 70 | |
| 71 | - // If the terminator is an InvokeInst, check only the normal destination block |
| 72 | - // as the unwind edge of InvokeInst is also very unlikely taken. |
| 73 | - if (auto *II = dyn_cast<InvokeInst>(TI)) { |
| 74 | - if (PostDominatedByUnreachable.count(II->getNormalDest())) |
| 75 | - PostDominatedByUnreachable.insert(BB); |
| 76 | - return; |
| 77 | +/// Compute a set of basic blocks that are post-dominated by unreachables. |
| 78 | +void BranchProbabilityInfo::computePostDominatedByUnreachable( |
| 79 | + const Function &F, PostDominatorTree *PDT) { |
| 80 | + SmallVector<const BasicBlock *, 8> WorkList; |
| 81 | + for (auto &BB : F) { |
| 82 | + const Instruction *TI = BB.getTerminator(); |
| 83 | + if (TI->getNumSuccessors() == 0) { |
| 84 | + if (isa<UnreachableInst>(TI) || |
| 85 | + // If this block is terminated by a call to |
| 86 | + // @llvm.experimental.deoptimize then treat it like an unreachable |
| 87 | + // since the @llvm.experimental.deoptimize call is expected to |
| 88 | + // practically never execute. |
| 89 | + BB.getTerminatingDeoptimizeCall()) |
| 90 | + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable); |
| 91 | + } |
| 92 | } |
| 93 | |
| 94 | - for (auto *I : successors(BB)) |
| 95 | - // If any of successor is not post dominated then BB is also not. |
| 96 | - if (!PostDominatedByUnreachable.count(I)) |
| 97 | - return; |
| 98 | - |
| 99 | - PostDominatedByUnreachable.insert(BB); |
| 100 | + while (!WorkList.empty()) { |
| 101 | + const BasicBlock *BB = WorkList.pop_back_val(); |
| 102 | + if (PostDominatedByUnreachable.count(BB)) |
| 103 | + continue; |
| 104 | + // If the terminator is an InvokeInst, check only the normal destination |
| 105 | + // block as the unwind edge of InvokeInst is also very unlikely taken. |
| 106 | + if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { |
| 107 | + if (PostDominatedByUnreachable.count(II->getNormalDest())) |
| 108 | + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); |
| 109 | + } |
| 110 | + // If all the successors are unreachable, BB is unreachable as well. |
| 111 | + else if (!successors(BB).empty() && |
| 112 | + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { |
| 113 | + return PostDominatedByUnreachable.count(Succ); |
| 114 | + })) |
| 115 | + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable); |
| 116 | + } |
| 117 | } |
| 118 | |
| 119 | -/// Add \p BB to PostDominatedByColdCall set if applicable. |
| 120 | -void |
| 121 | -BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) { |
| 122 | - assert(!PostDominatedByColdCall.count(BB)); |
| 123 | - const Instruction *TI = BB->getTerminator(); |
| 124 | - if (TI->getNumSuccessors() == 0) |
| 125 | - return; |
| 126 | +/// compute a set of basic blocks that are post-dominated by ColdCalls. |
| 127 | +void BranchProbabilityInfo::computePostDominatedByColdCall( |
| 128 | + const Function &F, PostDominatorTree *PDT) { |
| 129 | + SmallVector<const BasicBlock *, 8> WorkList; |
| 130 | + for (auto &BB : F) |
| 131 | + for (auto &I : BB) |
| 132 | + if (const CallInst *CI = dyn_cast<CallInst>(&I)) |
| 133 | + if (CI->hasFnAttr(Attribute::Cold)) |
| 134 | + UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall); |
| 135 | |
| 136 | - // If all of successor are post dominated then BB is also done. |
| 137 | - if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) { |
| 138 | - return PostDominatedByColdCall.count(SuccBB); |
| 139 | - })) { |
| 140 | - PostDominatedByColdCall.insert(BB); |
| 141 | - return; |
| 142 | - } |
| 143 | + while (!WorkList.empty()) { |
| 144 | + const BasicBlock *BB = WorkList.pop_back_val(); |
| 145 | |
| 146 | - // If the terminator is an InvokeInst, check only the normal destination |
| 147 | - // block as the unwind edge of InvokeInst is also very unlikely taken. |
| 148 | - if (auto *II = dyn_cast<InvokeInst>(TI)) |
| 149 | - if (PostDominatedByColdCall.count(II->getNormalDest())) { |
| 150 | - PostDominatedByColdCall.insert(BB); |
| 151 | - return; |
| 152 | + // If the terminator is an InvokeInst, check only the normal destination |
| 153 | + // block as the unwind edge of InvokeInst is also very unlikely taken. |
| 154 | + if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) { |
| 155 | + if (PostDominatedByColdCall.count(II->getNormalDest())) |
| 156 | + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); |
| 157 | } |
| 158 | - |
| 159 | - // Otherwise, if the block itself contains a cold function, add it to the |
| 160 | - // set of blocks post-dominated by a cold call. |
| 161 | - for (auto &I : *BB) |
| 162 | - if (const CallInst *CI = dyn_cast<CallInst>(&I)) |
| 163 | - if (CI->hasFnAttr(Attribute::Cold)) { |
| 164 | - PostDominatedByColdCall.insert(BB); |
| 165 | - return; |
| 166 | - } |
| 167 | + // If all of successor are post dominated then BB is also done. |
| 168 | + else if (!successors(BB).empty() && |
| 169 | + llvm::all_of(successors(BB), [this](const BasicBlock *Succ) { |
| 170 | + return PostDominatedByColdCall.count(Succ); |
| 171 | + })) |
| 172 | + UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall); |
| 173 | + } |
| 174 | } |
| 175 | |
| 176 | /// Calculate edge weights for successors lead to unreachable. |
| 177 | @@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI, |
| 178 | LLVM_DEBUG(dbgs() << "\n"); |
| 179 | } |
| 180 | |
| 181 | + std::unique_ptr<PostDominatorTree> PDT = |
| 182 | + std::make_unique<PostDominatorTree>(const_cast<Function &>(F)); |
| 183 | + computePostDominatedByUnreachable(F, PDT.get()); |
| 184 | + computePostDominatedByColdCall(F, PDT.get()); |
| 185 | + |
| 186 | // Walk the basic blocks in post-order so that we can build up state about |
| 187 | // the successors of a block iteratively. |
| 188 | for (auto BB : post_order(&F.getEntryBlock())) { |
| 189 | LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName() |
| 190 | << "\n"); |
| 191 | - updatePostDominatedByUnreachable(BB); |
| 192 | - updatePostDominatedByColdCall(BB); |
| 193 | // If there is no at least two successors, no sense to set probability. |
| 194 | if (BB->getTerminator()->getNumSuccessors() < 2) |
| 195 | continue; |
| 196 | diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll |
| 197 | index 64e0a82456f..8212cc47690 100644 |
| 198 | --- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll |
| 199 | +++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll |
| 200 | @@ -141,6 +141,24 @@ exit: |
| 201 | ret i32 %result |
| 202 | } |
| 203 | |
| 204 | +define i32 @test_cold_loop(i32 %a, i32 %b) { |
| 205 | +entry: |
| 206 | + %cond1 = icmp eq i32 %a, 42 |
| 207 | + br i1 %cond1, label %header, label %exit |
| 208 | + |
| 209 | +header: |
| 210 | + br label %body |
| 211 | + |
| 212 | +body: |
| 213 | + %cond2 = icmp eq i32 %b, 42 |
| 214 | + br i1 %cond2, label %header, label %exit |
| 215 | +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% |
| 216 | + |
| 217 | +exit: |
| 218 | + call void @coldfunc() |
| 219 | + ret i32 %b |
| 220 | +} |
| 221 | + |
| 222 | declare i32 @regular_function(i32 %i) |
| 223 | |
| 224 | define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) { |
| 225 | diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll |
| 226 | index 0566ca16c2f..6e01afd2cfc 100644 |
| 227 | --- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll |
| 228 | +++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll |
| 229 | @@ -79,6 +79,32 @@ exit: |
| 230 | ret i32 %b |
| 231 | } |
| 232 | |
| 233 | +define i32 @test4(i32 %a, i32 %b) { |
| 234 | +; CHECK: Printing analysis {{.*}} for function 'test4' |
| 235 | +; Make sure we handle loops post-dominated by unreachables. |
| 236 | +entry: |
| 237 | + %cond1 = icmp eq i32 %a, 42 |
| 238 | + br i1 %cond1, label %header, label %exit |
| 239 | +; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00% |
| 240 | +; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge] |
| 241 | + |
| 242 | +header: |
| 243 | + br label %body |
| 244 | + |
| 245 | +body: |
| 246 | + %cond2 = icmp eq i32 %a, 42 |
| 247 | + br i1 %cond2, label %header, label %abort |
| 248 | +; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00% |
| 249 | +; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00% |
| 250 | + |
| 251 | +abort: |
| 252 | + call void @abort() noreturn |
| 253 | + unreachable |
| 254 | + |
| 255 | +exit: |
| 256 | + ret i32 %b |
| 257 | +} |
| 258 | + |
| 259 | @_ZTIi = external global i8* |
| 260 | |
| 261 | ; CHECK-LABEL: throwSmallException |
| 262 | diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll |
| 263 | index 883d26b6690..a859121bb50 100644 |
| 264 | --- a/llvm/test/CodeGen/PowerPC/pr36292.ll |
| 265 | +++ b/llvm/test/CodeGen/PowerPC/pr36292.ll |
| 266 | @@ -15,8 +15,7 @@ define void @test() nounwind comdat { |
| 267 | ; CHECK-NEXT: ld 29, 0(3) |
| 268 | ; CHECK-NEXT: ld 30, 32(1) |
| 269 | ; CHECK-NEXT: cmpld 30, 29 |
| 270 | -; CHECK-NEXT: bge- 0, .LBB0_2 |
| 271 | -; CHECK-NEXT: .p2align 5 |
| 272 | +; CHECK-NEXT: bge 0, .LBB0_2 |
| 273 | ; CHECK-NEXT: .LBB0_1: # %bounds.ok |
| 274 | ; CHECK-NEXT: # |
| 275 | ; CHECK-NEXT: lfsx 2, 0, 3 |
| 276 | @@ -26,7 +25,7 @@ define void @test() nounwind comdat { |
| 277 | ; CHECK-NEXT: addi 30, 30, 1 |
| 278 | ; CHECK-NEXT: stfsx 1, 0, 3 |
| 279 | ; CHECK-NEXT: cmpld 30, 29 |
| 280 | -; CHECK-NEXT: blt+ 0, .LBB0_1 |
| 281 | +; CHECK-NEXT: blt 0, .LBB0_1 |
| 282 | ; CHECK-NEXT: .LBB0_2: # %bounds.fail |
| 283 | ; CHECK-NEXT: std 30, 32(1) |
| 284 | %pos = alloca i64, align 8 |
| 285 | diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll |
| 286 | index 8fdcd1eac45..7804b0a3f09 100644 |
| 287 | --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll |
| 288 | +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll |
| 289 | @@ -44,7 +44,6 @@ define void @print_res() nounwind { |
| 290 | ; CHECK-NEXT: lbz 5, 0(5) |
| 291 | ; CHECK-NEXT: addi 3, 3, 1 |
| 292 | ; CHECK-NEXT: bdz .LBB0_4 |
| 293 | -; CHECK-NEXT: .p2align 4 |
| 294 | ; CHECK-NEXT: .LBB0_3: # |
| 295 | ; CHECK-NEXT: clrldi 10, 8, 32 |
| 296 | ; CHECK-NEXT: cntlzw 9, 6 |
| 297 | diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll |
| 298 | index acc4b7e1381..258cc2031ae 100644 |
| 299 | --- a/llvm/test/CodeGen/X86/block-placement.ll |
| 300 | +++ b/llvm/test/CodeGen/X86/block-placement.ll |
| 301 | @@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) { |
| 302 | ; CHECK: %loop.header |
| 303 | ; CHECK: %loop.body1 |
| 304 | ; CHECK: %loop.body2 |
| 305 | +; CHECK: %loop.body3 |
| 306 | +; CHECK: %loop.inner1.begin |
| 307 | ; CHECK: %loop.body4 |
| 308 | ; CHECK: %loop.inner2.begin |
| 309 | ; CHECK: %loop.inner2.begin |
| 310 | -; CHECK: %loop.body3 |
| 311 | -; CHECK: %loop.inner1.begin |
| 312 | ; CHECK: %bail |
| 313 | |
| 314 | entry: |
| 315 | diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll |
| 316 | index 2da9413a9a0..484104da9ff 100644 |
| 317 | --- a/llvm/test/CodeGen/X86/pr37916.ll |
| 318 | +++ b/llvm/test/CodeGen/X86/pr37916.ll |
| 319 | @@ -7,7 +7,6 @@ |
| 320 | define void @fn1() local_unnamed_addr { |
| 321 | ; CHECK-LABEL: fn1: |
| 322 | ; CHECK: # %bb.0: # %entry |
| 323 | -; CHECK-NEXT: .p2align 4, 0x90 |
| 324 | ; CHECK-NEXT: .LBB0_1: # %if.end |
| 325 | ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 |
| 326 | ; CHECK-NEXT: movl a+4, %eax |
| 327 | diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |
| 328 | index 9238ab0bf89..92708d33924 100644 |
| 329 | --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |
| 330 | +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll |
| 331 | @@ -29,8 +29,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 332 | ; CHECK-NEXT: .cfi_def_cfa_offset 48 |
| 333 | ; CHECK-NEXT: pushq %rbx |
| 334 | ; CHECK-NEXT: .cfi_def_cfa_offset 56 |
| 335 | -; CHECK-NEXT: subq $536, %rsp ## imm = 0x218 |
| 336 | -; CHECK-NEXT: .cfi_def_cfa_offset 592 |
| 337 | +; CHECK-NEXT: subq $552, %rsp ## imm = 0x228 |
| 338 | +; CHECK-NEXT: .cfi_def_cfa_offset 608 |
| 339 | ; CHECK-NEXT: .cfi_offset %rbx, -56 |
| 340 | ; CHECK-NEXT: .cfi_offset %r12, -48 |
| 341 | ; CHECK-NEXT: .cfi_offset %r13, -40 |
| 342 | @@ -54,7 +54,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 343 | ; CHECK-NEXT: testb %al, %al |
| 344 | ; CHECK-NEXT: je LBB0_55 |
| 345 | ; CHECK-NEXT: LBB0_4: ## %cleanup |
| 346 | -; CHECK-NEXT: addq $536, %rsp ## imm = 0x218 |
| 347 | +; CHECK-NEXT: addq $552, %rsp ## imm = 0x228 |
| 348 | ; CHECK-NEXT: popq %rbx |
| 349 | ; CHECK-NEXT: popq %r12 |
| 350 | ; CHECK-NEXT: popq %r13 |
| 351 | @@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 352 | ; CHECK-NEXT: je LBB0_55 |
| 353 | ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 |
| 354 | ; CHECK-NEXT: movq %rdx, %rbx |
| 355 | -; CHECK-NEXT: movq %rdi, %rbp |
| 356 | +; CHECK-NEXT: movq %rdi, %r14 |
| 357 | ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax |
| 358 | ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx |
| 359 | ; CHECK-NEXT: cmpq %rax, %rcx |
| 360 | @@ -78,10 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 361 | ; CHECK-NEXT: movl $32, %esi |
| 362 | ; CHECK-NEXT: callq _memset |
| 363 | ; CHECK-NEXT: LBB0_8: ## %while.body.preheader |
| 364 | -; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill |
| 365 | ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410 |
| 366 | ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx |
| 367 | -; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx |
| 368 | +; CHECK-NEXT: leaq 8(%rcx,%rax), %rax |
| 369 | +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill |
| 370 | ; CHECK-NEXT: movl $1, %r15d |
| 371 | ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax |
| 372 | ; CHECK-NEXT: movb $1, %cl |
| 373 | @@ -92,69 +92,70 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 374 | ; CHECK-NEXT: testb %cl, %cl |
| 375 | ; CHECK-NEXT: jne LBB0_9 |
| 376 | ; CHECK-NEXT: ## %bb.10: ## %do.end |
| 377 | -; CHECK-NEXT: xorl %r14d, %r14d |
| 378 | -; CHECK-NEXT: testb %r14b, %r14b |
| 379 | +; CHECK-NEXT: xorl %ebp, %ebp |
| 380 | +; CHECK-NEXT: testb %bpl, %bpl |
| 381 | ; CHECK-NEXT: jne LBB0_11 |
| 382 | ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader |
| 383 | -; CHECK-NEXT: xorl %edx, %edx |
| 384 | -; CHECK-NEXT: leaq {{.*}}(%rip), %rsi |
| 385 | -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi |
| 386 | -; CHECK-NEXT: xorl %ebp, %ebp |
| 387 | -; CHECK-NEXT: xorl %r13d, %r13d |
| 388 | +; CHECK-NEXT: xorl %ebx, %ebx |
| 389 | +; CHECK-NEXT: leaq {{.*}}(%rip), %r13 |
| 390 | +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill |
| 391 | +; CHECK-NEXT: xorl %r12d, %r12d |
| 392 | +; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill |
| 393 | ; CHECK-NEXT: jmp LBB0_13 |
| 394 | ; CHECK-NEXT: .p2align 4, 0x90 |
| 395 | ; CHECK-NEXT: LBB0_20: ## %sw.bb256 |
| 396 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 397 | -; CHECK-NEXT: movl %r14d, %r13d |
| 398 | +; CHECK-NEXT: movl %ebp, %r12d |
| 399 | ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge |
| 400 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 401 | ; CHECK-NEXT: decl %r15d |
| 402 | ; CHECK-NEXT: testl %r15d, %r15d |
| 403 | -; CHECK-NEXT: movl %r13d, %r14d |
| 404 | +; CHECK-NEXT: movl %r12d, %ebp |
| 405 | ; CHECK-NEXT: jle LBB0_22 |
| 406 | ; CHECK-NEXT: LBB0_13: ## %while.body200 |
| 407 | ; CHECK-NEXT: ## =>This Loop Header: Depth=1 |
| 408 | ; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 |
| 409 | ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 |
| 410 | -; CHECK-NEXT: leal -268(%r14), %eax |
| 411 | +; CHECK-NEXT: leal -268(%rbp), %eax |
| 412 | ; CHECK-NEXT: cmpl $105, %eax |
| 413 | ; CHECK-NEXT: ja LBB0_14 |
| 414 | ; CHECK-NEXT: ## %bb.56: ## %while.body200 |
| 415 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 416 | -; CHECK-NEXT: movslq (%rdi,%rax,4), %rax |
| 417 | -; CHECK-NEXT: addq %rdi, %rax |
| 418 | +; CHECK-NEXT: movslq (%r13,%rax,4), %rax |
| 419 | +; CHECK-NEXT: addq %r13, %rax |
| 420 | ; CHECK-NEXT: jmpq *%rax |
| 421 | ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader |
| 422 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 423 | -; CHECK-NEXT: testb %dl, %dl |
| 424 | -; CHECK-NEXT: movl %r14d, %r13d |
| 425 | +; CHECK-NEXT: testb %bl, %bl |
| 426 | +; CHECK-NEXT: movl %ebp, %r12d |
| 427 | ; CHECK-NEXT: jne LBB0_21 |
| 428 | ; CHECK-NEXT: jmp LBB0_55 |
| 429 | ; CHECK-NEXT: .p2align 4, 0x90 |
| 430 | ; CHECK-NEXT: LBB0_14: ## %while.body200 |
| 431 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 432 | -; CHECK-NEXT: leal 1(%r14), %eax |
| 433 | +; CHECK-NEXT: leal 1(%rbp), %eax |
| 434 | ; CHECK-NEXT: cmpl $21, %eax |
| 435 | ; CHECK-NEXT: ja LBB0_20 |
| 436 | ; CHECK-NEXT: ## %bb.15: ## %while.body200 |
| 437 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 438 | -; CHECK-NEXT: movl $-1, %r13d |
| 439 | -; CHECK-NEXT: movslq (%rsi,%rax,4), %rax |
| 440 | -; CHECK-NEXT: addq %rsi, %rax |
| 441 | +; CHECK-NEXT: movl $-1, %r12d |
| 442 | +; CHECK-NEXT: leaq {{.*}}(%rip), %rcx |
| 443 | +; CHECK-NEXT: movslq (%rcx,%rax,4), %rax |
| 444 | +; CHECK-NEXT: addq %rcx, %rax |
| 445 | ; CHECK-NEXT: jmpq *%rax |
| 446 | ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader |
| 447 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 448 | -; CHECK-NEXT: movl $1, %r13d |
| 449 | +; CHECK-NEXT: movl $1, %r12d |
| 450 | ; CHECK-NEXT: jmp LBB0_21 |
| 451 | ; CHECK-NEXT: LBB0_26: ## %sw.bb474 |
| 452 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 453 | -; CHECK-NEXT: testb %dl, %dl |
| 454 | -; CHECK-NEXT: ## implicit-def: $r12 |
| 455 | +; CHECK-NEXT: testb %bl, %bl |
| 456 | +; CHECK-NEXT: ## implicit-def: $r14 |
| 457 | ; CHECK-NEXT: jne LBB0_34 |
| 458 | ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader |
| 459 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 460 | -; CHECK-NEXT: testb %dl, %dl |
| 461 | -; CHECK-NEXT: ## implicit-def: $r12 |
| 462 | +; CHECK-NEXT: testb %bl, %bl |
| 463 | +; CHECK-NEXT: ## implicit-def: $r14 |
| 464 | ; CHECK-NEXT: jne LBB0_34 |
| 465 | ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader |
| 466 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 467 | @@ -165,8 +166,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 468 | ; CHECK-NEXT: .p2align 4, 0x90 |
| 469 | ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge |
| 470 | ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 |
| 471 | -; CHECK-NEXT: leaq 1(%r12), %rax |
| 472 | -; CHECK-NEXT: testb %dl, %dl |
| 473 | +; CHECK-NEXT: leaq 1(%r14), %rax |
| 474 | +; CHECK-NEXT: testb %bl, %bl |
| 475 | ; CHECK-NEXT: je LBB0_33 |
| 476 | ; CHECK-NEXT: ## %bb.29: ## %land.rhs485 |
| 477 | ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 |
| 478 | @@ -175,15 +176,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 479 | ; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 |
| 480 | ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 |
| 481 | ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 |
| 482 | -; CHECK-NEXT: movq %rax, %r12 |
| 483 | -; CHECK-NEXT: testb %dl, %dl |
| 484 | +; CHECK-NEXT: movq %rax, %r14 |
| 485 | +; CHECK-NEXT: testb %bl, %bl |
| 486 | ; CHECK-NEXT: jne LBB0_32 |
| 487 | ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 |
| 488 | ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 |
| 489 | ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 |
| 490 | ; CHECK-NEXT: callq ___maskrune |
| 491 | -; CHECK-NEXT: xorl %edx, %edx |
| 492 | -; CHECK-NEXT: testb %dl, %dl |
| 493 | +; CHECK-NEXT: testb %bl, %bl |
| 494 | ; CHECK-NEXT: jne LBB0_32 |
| 495 | ; CHECK-NEXT: jmp LBB0_34 |
| 496 | ; CHECK-NEXT: LBB0_45: ## %sw.bb1134 |
| 497 | @@ -193,23 +193,23 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 498 | ; CHECK-NEXT: cmpq %rax, %rcx |
| 499 | ; CHECK-NEXT: jb LBB0_55 |
| 500 | ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 |
| 501 | -; CHECK-NEXT: xorl %ebp, %ebp |
| 502 | -; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C |
| 503 | +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill |
| 504 | +; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C |
| 505 | ; CHECK-NEXT: jmp LBB0_21 |
| 506 | -; CHECK-NEXT: LBB0_19: ## %sw.bb243 |
| 507 | +; CHECK-NEXT: LBB0_40: ## %sw.bb566 |
| 508 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 509 | -; CHECK-NEXT: movl $2, %r13d |
| 510 | +; CHECK-NEXT: movl $20, %r12d |
| 511 | ; CHECK-NEXT: jmp LBB0_21 |
| 512 | -; CHECK-NEXT: LBB0_40: ## %sw.bb566 |
| 513 | +; CHECK-NEXT: LBB0_19: ## %sw.bb243 |
| 514 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 515 | -; CHECK-NEXT: movl $20, %r13d |
| 516 | +; CHECK-NEXT: movl $2, %r12d |
| 517 | ; CHECK-NEXT: jmp LBB0_21 |
| 518 | ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit |
| 519 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 520 | -; CHECK-NEXT: incq %r12 |
| 521 | +; CHECK-NEXT: incq %r14 |
| 522 | ; CHECK-NEXT: LBB0_34: ## %if.end517 |
| 523 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 524 | -; CHECK-NEXT: leal -324(%r13), %eax |
| 525 | +; CHECK-NEXT: leal -324(%r12), %eax |
| 526 | ; CHECK-NEXT: cmpl $59, %eax |
| 527 | ; CHECK-NEXT: ja LBB0_35 |
| 528 | ; CHECK-NEXT: ## %bb.57: ## %if.end517 |
| 529 | @@ -219,11 +219,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 530 | ; CHECK-NEXT: jb LBB0_38 |
| 531 | ; CHECK-NEXT: LBB0_35: ## %if.end517 |
| 532 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 533 | -; CHECK-NEXT: cmpl $11, %r13d |
| 534 | +; CHECK-NEXT: cmpl $11, %r12d |
| 535 | ; CHECK-NEXT: je LBB0_38 |
| 536 | ; CHECK-NEXT: ## %bb.36: ## %if.end517 |
| 537 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 538 | -; CHECK-NEXT: cmpl $24, %r13d |
| 539 | +; CHECK-NEXT: cmpl $24, %r12d |
| 540 | ; CHECK-NEXT: je LBB0_38 |
| 541 | ; CHECK-NEXT: ## %bb.37: ## %if.then532 |
| 542 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 543 | @@ -233,15 +233,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 544 | ; CHECK-NEXT: LBB0_38: ## %for.cond534 |
| 545 | ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 |
| 546 | ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 |
| 547 | -; CHECK-NEXT: testb %dl, %dl |
| 548 | +; CHECK-NEXT: testb %bl, %bl |
| 549 | ; CHECK-NEXT: jne LBB0_38 |
| 550 | ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader |
| 551 | ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 |
| 552 | -; CHECK-NEXT: testb %dl, %dl |
| 553 | -; CHECK-NEXT: movb $0, (%r12) |
| 554 | -; CHECK-NEXT: movl %r14d, %r13d |
| 555 | -; CHECK-NEXT: leaq {{.*}}(%rip), %rsi |
| 556 | -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi |
| 557 | +; CHECK-NEXT: testb %bl, %bl |
| 558 | +; CHECK-NEXT: movb $0, (%r14) |
| 559 | +; CHECK-NEXT: movl %ebp, %r12d |
| 560 | +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload |
| 561 | ; CHECK-NEXT: jmp LBB0_21 |
| 562 | ; CHECK-NEXT: .p2align 4, 0x90 |
| 563 | ; CHECK-NEXT: LBB0_42: ## %while.cond864 |
| 564 | @@ -256,30 +255,44 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 565 | ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
| 566 | ; CHECK-NEXT: jmp LBB0_25 |
| 567 | ; CHECK-NEXT: LBB0_11: |
| 568 | -; CHECK-NEXT: xorl %ebp, %ebp |
| 569 | -; CHECK-NEXT: xorl %r13d, %r13d |
| 570 | +; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill |
| 571 | +; CHECK-NEXT: xorl %r12d, %r12d |
| 572 | ; CHECK-NEXT: LBB0_22: ## %while.end1465 |
| 573 | -; CHECK-NEXT: incl %r13d |
| 574 | -; CHECK-NEXT: cmpl $16, %r13d |
| 575 | +; CHECK-NEXT: incl %r12d |
| 576 | +; CHECK-NEXT: cmpl $16, %r12d |
| 577 | ; CHECK-NEXT: ja LBB0_50 |
| 578 | ; CHECK-NEXT: ## %bb.23: ## %while.end1465 |
| 579 | ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801 |
| 580 | -; CHECK-NEXT: btl %r13d, %eax |
| 581 | +; CHECK-NEXT: btl %r12d, %eax |
| 582 | ; CHECK-NEXT: jae LBB0_50 |
| 583 | ; CHECK-NEXT: ## %bb.24: |
| 584 | -; CHECK-NEXT: xorl %ebp, %ebp |
| 585 | -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload |
| 586 | +; CHECK-NEXT: xorl %ebx, %ebx |
| 587 | ; CHECK-NEXT: LBB0_48: ## %if.then1477 |
| 588 | ; CHECK-NEXT: movl $1, %edx |
| 589 | ; CHECK-NEXT: callq _write |
| 590 | -; CHECK-NEXT: subq %rbp, %rbx |
| 591 | +; CHECK-NEXT: subq %rbx, %r14 |
| 592 | ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax |
| 593 | -; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax |
| 594 | +; CHECK-NEXT: leaq 8189(%r14,%rax), %rax |
| 595 | ; CHECK-NEXT: .p2align 4, 0x90 |
| 596 | ; CHECK-NEXT: LBB0_49: ## %for.body1723 |
| 597 | ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
| 598 | ; CHECK-NEXT: decq %rax |
| 599 | ; CHECK-NEXT: jmp LBB0_49 |
| 600 | +; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit |
| 601 | +; CHECK-NEXT: movq %r14, %rbx |
| 602 | +; CHECK-NEXT: jmp LBB0_48 |
| 603 | +; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader |
| 604 | +; CHECK-NEXT: xorl %eax, %eax |
| 605 | +; CHECK-NEXT: testb %al, %al |
| 606 | +; CHECK-NEXT: je LBB0_41 |
| 607 | +; CHECK-NEXT: .p2align 4, 0x90 |
| 608 | +; CHECK-NEXT: LBB0_17: ## %for.body643.us |
| 609 | +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
| 610 | +; CHECK-NEXT: jmp LBB0_17 |
| 611 | +; CHECK-NEXT: .p2align 4, 0x90 |
| 612 | +; CHECK-NEXT: LBB0_41: ## %while.cond661 |
| 613 | +; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
| 614 | +; CHECK-NEXT: jmp LBB0_41 |
| 615 | ; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader |
| 616 | ; CHECK-NEXT: movl $512, %eax ## imm = 0x200 |
| 617 | ; CHECK-NEXT: cmpq %rax, %rax |
| 618 | @@ -289,14 +302,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 619 | ; CHECK-NEXT: testb %al, %al |
| 620 | ; CHECK-NEXT: jne LBB0_54 |
| 621 | ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader |
| 622 | -; CHECK-NEXT: incl %ebp |
| 623 | -; CHECK-NEXT: .p2align 4, 0x90 |
| 624 | +; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill |
| 625 | ; CHECK-NEXT: LBB0_53: ## %while.body1679 |
| 626 | ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
| 627 | -; CHECK-NEXT: movq (%rbx), %rdi |
| 628 | +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload |
| 629 | +; CHECK-NEXT: movq (%rax), %rdi |
| 630 | ; CHECK-NEXT: callq _fileno |
| 631 | -; CHECK-NEXT: movslq %ebp, %rax |
| 632 | -; CHECK-NEXT: leal 1(%rax), %ebp |
| 633 | +; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload |
| 634 | +; CHECK-NEXT: leal 1(%rax), %ecx |
| 635 | +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill |
| 636 | ; CHECK-NEXT: cmpq %rax, %rax |
| 637 | ; CHECK-NEXT: jl LBB0_53 |
| 638 | ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader |
| 639 | @@ -304,22 +318,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { |
| 640 | ; CHECK-NEXT: testb %al, %al |
| 641 | ; CHECK-NEXT: LBB0_55: ## %if.then.i |
| 642 | ; CHECK-NEXT: ud2 |
| 643 | -; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit |
| 644 | -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload |
| 645 | -; CHECK-NEXT: movq %rbx, %rbp |
| 646 | -; CHECK-NEXT: jmp LBB0_48 |
| 647 | -; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader |
| 648 | -; CHECK-NEXT: xorl %eax, %eax |
| 649 | -; CHECK-NEXT: testb %al, %al |
| 650 | -; CHECK-NEXT: je LBB0_41 |
| 651 | -; CHECK-NEXT: .p2align 4, 0x90 |
| 652 | -; CHECK-NEXT: LBB0_17: ## %for.body643.us |
| 653 | -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
| 654 | -; CHECK-NEXT: jmp LBB0_17 |
| 655 | -; CHECK-NEXT: .p2align 4, 0x90 |
| 656 | -; CHECK-NEXT: LBB0_41: ## %while.cond661 |
| 657 | -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 |
| 658 | -; CHECK-NEXT: jmp LBB0_41 |
| 659 | entry: |
| 660 | %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64 |
| 661 | %old = alloca [512 x i8], align 16 |