blob: 5b5887904a9dab1fcba20ae86bb6c41735ab035f [file] [log] [blame]
Taewook Oh2da205d2019-12-02 18:15:221diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
2index c8965936fb9..41d6c23b8d0 100644
3--- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
4+++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
5@@ -34,6 +34,7 @@ namespace llvm {
6 class Function;
7 class LoopInfo;
8 class raw_ostream;
9+class PostDominatorTree;
10 class TargetLibraryInfo;
11 class Value;
12
13@@ -187,8 +188,10 @@ private:
14 /// Track the set of blocks that always lead to a cold call.
15 SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall;
16
17- void updatePostDominatedByUnreachable(const BasicBlock *BB);
18- void updatePostDominatedByColdCall(const BasicBlock *BB);
19+ void computePostDominatedByUnreachable(const Function &F,
20+ PostDominatorTree *PDT);
21+ void computePostDominatedByColdCall(const Function &F,
22+ PostDominatorTree *PDT);
23 bool calcUnreachableHeuristics(const BasicBlock *BB);
24 bool calcMetadataWeights(const BasicBlock *BB);
25 bool calcColdCallHeuristics(const BasicBlock *BB);
26diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
27index 7bd237b9ad5..ffba65b5ed5 100644
28--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
29+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
30@@ -16,6 +16,7 @@
31 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/Analysis/LoopInfo.h"
34+#include "llvm/Analysis/PostDominators.h"
35 #include "llvm/Analysis/TargetLibraryInfo.h"
36 #include "llvm/IR/Attributes.h"
37 #include "llvm/IR/BasicBlock.h"
38@@ -146,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
39 /// instruction. This is essentially never taken.
40 static const uint32_t IH_NONTAKEN_WEIGHT = 1;
41
42-/// Add \p BB to PostDominatedByUnreachable set if applicable.
43-void
44-BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
45- const Instruction *TI = BB->getTerminator();
46- if (TI->getNumSuccessors() == 0) {
47- if (isa<UnreachableInst>(TI) ||
48- // If this block is terminated by a call to
49- // @llvm.experimental.deoptimize then treat it like an unreachable since
50- // the @llvm.experimental.deoptimize call is expected to practically
51- // never execute.
52- BB->getTerminatingDeoptimizeCall())
53- PostDominatedByUnreachable.insert(BB);
54- return;
55- }
56+static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
57+ SmallVectorImpl<const BasicBlock *> &WorkList,
58+ SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
59+ SmallVector<BasicBlock *, 8> Descendants;
60+ SmallPtrSet<const BasicBlock *, 16> NewItems;
61+
62+ PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
63+ for (auto *BB : Descendants)
64+ if (TargetSet.insert(BB).second)
65+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
66+ if (!TargetSet.count(*PI))
67+ NewItems.insert(*PI);
68+ WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
69+}
70
71- // If the terminator is an InvokeInst, check only the normal destination block
72- // as the unwind edge of InvokeInst is also very unlikely taken.
73- if (auto *II = dyn_cast<InvokeInst>(TI)) {
74- if (PostDominatedByUnreachable.count(II->getNormalDest()))
75- PostDominatedByUnreachable.insert(BB);
76- return;
77+/// Compute a set of basic blocks that are post-dominated by unreachables.
78+void BranchProbabilityInfo::computePostDominatedByUnreachable(
79+ const Function &F, PostDominatorTree *PDT) {
80+ SmallVector<const BasicBlock *, 8> WorkList;
81+ for (auto &BB : F) {
82+ const Instruction *TI = BB.getTerminator();
83+ if (TI->getNumSuccessors() == 0) {
84+ if (isa<UnreachableInst>(TI) ||
85+ // If this block is terminated by a call to
86+ // @llvm.experimental.deoptimize then treat it like an unreachable
87+ // since the @llvm.experimental.deoptimize call is expected to
88+ // practically never execute.
89+ BB.getTerminatingDeoptimizeCall())
90+ UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
91+ }
92 }
93
94- for (auto *I : successors(BB))
95- // If any of successor is not post dominated then BB is also not.
96- if (!PostDominatedByUnreachable.count(I))
97- return;
98-
99- PostDominatedByUnreachable.insert(BB);
100+ while (!WorkList.empty()) {
101+ const BasicBlock *BB = WorkList.pop_back_val();
102+ if (PostDominatedByUnreachable.count(BB))
103+ continue;
104+ // If the terminator is an InvokeInst, check only the normal destination
105+ // block as the unwind edge of InvokeInst is also very unlikely taken.
106+ if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
107+ if (PostDominatedByUnreachable.count(II->getNormalDest()))
108+ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
109+ }
110+ // If all the successors are unreachable, BB is unreachable as well.
111+ else if (!successors(BB).empty() &&
112+ llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
113+ return PostDominatedByUnreachable.count(Succ);
114+ }))
115+ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
116+ }
117 }
118
119-/// Add \p BB to PostDominatedByColdCall set if applicable.
120-void
121-BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
122- assert(!PostDominatedByColdCall.count(BB));
123- const Instruction *TI = BB->getTerminator();
124- if (TI->getNumSuccessors() == 0)
125- return;
126+/// compute a set of basic blocks that are post-dominated by ColdCalls.
127+void BranchProbabilityInfo::computePostDominatedByColdCall(
128+ const Function &F, PostDominatorTree *PDT) {
129+ SmallVector<const BasicBlock *, 8> WorkList;
130+ for (auto &BB : F)
131+ for (auto &I : BB)
132+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
133+ if (CI->hasFnAttr(Attribute::Cold))
134+ UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);
135
136- // If all of successor are post dominated then BB is also done.
137- if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) {
138- return PostDominatedByColdCall.count(SuccBB);
139- })) {
140- PostDominatedByColdCall.insert(BB);
141- return;
142- }
143+ while (!WorkList.empty()) {
144+ const BasicBlock *BB = WorkList.pop_back_val();
145
146- // If the terminator is an InvokeInst, check only the normal destination
147- // block as the unwind edge of InvokeInst is also very unlikely taken.
148- if (auto *II = dyn_cast<InvokeInst>(TI))
149- if (PostDominatedByColdCall.count(II->getNormalDest())) {
150- PostDominatedByColdCall.insert(BB);
151- return;
152+ // If the terminator is an InvokeInst, check only the normal destination
153+ // block as the unwind edge of InvokeInst is also very unlikely taken.
154+ if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
155+ if (PostDominatedByColdCall.count(II->getNormalDest()))
156+ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
157 }
158-
159- // Otherwise, if the block itself contains a cold function, add it to the
160- // set of blocks post-dominated by a cold call.
161- for (auto &I : *BB)
162- if (const CallInst *CI = dyn_cast<CallInst>(&I))
163- if (CI->hasFnAttr(Attribute::Cold)) {
164- PostDominatedByColdCall.insert(BB);
165- return;
166- }
167+ // If all of successor are post dominated then BB is also done.
168+ else if (!successors(BB).empty() &&
169+ llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
170+ return PostDominatedByColdCall.count(Succ);
171+ }))
172+ UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
173+ }
174 }
175
176 /// Calculate edge weights for successors lead to unreachable.
177@@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
178 LLVM_DEBUG(dbgs() << "\n");
179 }
180
181+ std::unique_ptr<PostDominatorTree> PDT =
182+ std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
183+ computePostDominatedByUnreachable(F, PDT.get());
184+ computePostDominatedByColdCall(F, PDT.get());
185+
186 // Walk the basic blocks in post-order so that we can build up state about
187 // the successors of a block iteratively.
188 for (auto BB : post_order(&F.getEntryBlock())) {
189 LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
190 << "\n");
191- updatePostDominatedByUnreachable(BB);
192- updatePostDominatedByColdCall(BB);
193 // If there is no at least two successors, no sense to set probability.
194 if (BB->getTerminator()->getNumSuccessors() < 2)
195 continue;
196diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
197index 64e0a82456f..8212cc47690 100644
198--- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
199+++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
200@@ -141,6 +141,24 @@ exit:
201 ret i32 %result
202 }
203
204+define i32 @test_cold_loop(i32 %a, i32 %b) {
205+entry:
206+ %cond1 = icmp eq i32 %a, 42
207+ br i1 %cond1, label %header, label %exit
208+
209+header:
210+ br label %body
211+
212+body:
213+ %cond2 = icmp eq i32 %b, 42
214+ br i1 %cond2, label %header, label %exit
215+; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00%
216+
217+exit:
218+ call void @coldfunc()
219+ ret i32 %b
220+}
221+
222 declare i32 @regular_function(i32 %i)
223
224 define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) {
225diff --git a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll
226index 0566ca16c2f..6e01afd2cfc 100644
227--- a/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll
228+++ b/llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll
229@@ -79,6 +79,32 @@ exit:
230 ret i32 %b
231 }
232
233+define i32 @test4(i32 %a, i32 %b) {
234+; CHECK: Printing analysis {{.*}} for function 'test4'
235+; Make sure we handle loops post-dominated by unreachables.
236+entry:
237+ %cond1 = icmp eq i32 %a, 42
238+ br i1 %cond1, label %header, label %exit
239+; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00%
240+; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge]
241+
242+header:
243+ br label %body
244+
245+body:
246+ %cond2 = icmp eq i32 %a, 42
247+ br i1 %cond2, label %header, label %abort
248+; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00%
249+; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00%
250+
251+abort:
252+ call void @abort() noreturn
253+ unreachable
254+
255+exit:
256+ ret i32 %b
257+}
258+
259 @_ZTIi = external global i8*
260
261 ; CHECK-LABEL: throwSmallException
262diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll
263index 883d26b6690..a859121bb50 100644
264--- a/llvm/test/CodeGen/PowerPC/pr36292.ll
265+++ b/llvm/test/CodeGen/PowerPC/pr36292.ll
266@@ -15,8 +15,7 @@ define void @test() nounwind comdat {
267 ; CHECK-NEXT: ld 29, 0(3)
268 ; CHECK-NEXT: ld 30, 32(1)
269 ; CHECK-NEXT: cmpld 30, 29
270-; CHECK-NEXT: bge- 0, .LBB0_2
271-; CHECK-NEXT: .p2align 5
272+; CHECK-NEXT: bge 0, .LBB0_2
273 ; CHECK-NEXT: .LBB0_1: # %bounds.ok
274 ; CHECK-NEXT: #
275 ; CHECK-NEXT: lfsx 2, 0, 3
276@@ -26,7 +25,7 @@ define void @test() nounwind comdat {
277 ; CHECK-NEXT: addi 30, 30, 1
278 ; CHECK-NEXT: stfsx 1, 0, 3
279 ; CHECK-NEXT: cmpld 30, 29
280-; CHECK-NEXT: blt+ 0, .LBB0_1
281+; CHECK-NEXT: blt 0, .LBB0_1
282 ; CHECK-NEXT: .LBB0_2: # %bounds.fail
283 ; CHECK-NEXT: std 30, 32(1)
284 %pos = alloca i64, align 8
285diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
286index 8fdcd1eac45..7804b0a3f09 100644
287--- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
288+++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
289@@ -44,7 +44,6 @@ define void @print_res() nounwind {
290 ; CHECK-NEXT: lbz 5, 0(5)
291 ; CHECK-NEXT: addi 3, 3, 1
292 ; CHECK-NEXT: bdz .LBB0_4
293-; CHECK-NEXT: .p2align 4
294 ; CHECK-NEXT: .LBB0_3: #
295 ; CHECK-NEXT: clrldi 10, 8, 32
296 ; CHECK-NEXT: cntlzw 9, 6
297diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
298index acc4b7e1381..258cc2031ae 100644
299--- a/llvm/test/CodeGen/X86/block-placement.ll
300+++ b/llvm/test/CodeGen/X86/block-placement.ll
301@@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) {
302 ; CHECK: %loop.header
303 ; CHECK: %loop.body1
304 ; CHECK: %loop.body2
305+; CHECK: %loop.body3
306+; CHECK: %loop.inner1.begin
307 ; CHECK: %loop.body4
308 ; CHECK: %loop.inner2.begin
309 ; CHECK: %loop.inner2.begin
310-; CHECK: %loop.body3
311-; CHECK: %loop.inner1.begin
312 ; CHECK: %bail
313
314 entry:
315diff --git a/llvm/test/CodeGen/X86/pr37916.ll b/llvm/test/CodeGen/X86/pr37916.ll
316index 2da9413a9a0..484104da9ff 100644
317--- a/llvm/test/CodeGen/X86/pr37916.ll
318+++ b/llvm/test/CodeGen/X86/pr37916.ll
319@@ -7,7 +7,6 @@
320 define void @fn1() local_unnamed_addr {
321 ; CHECK-LABEL: fn1:
322 ; CHECK: # %bb.0: # %entry
323-; CHECK-NEXT: .p2align 4, 0x90
324 ; CHECK-NEXT: .LBB0_1: # %if.end
325 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
326 ; CHECK-NEXT: movl a+4, %eax
327diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
328index 9238ab0bf89..92708d33924 100644
329--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
330+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
331@@ -29,8 +29,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
332 ; CHECK-NEXT: .cfi_def_cfa_offset 48
333 ; CHECK-NEXT: pushq %rbx
334 ; CHECK-NEXT: .cfi_def_cfa_offset 56
335-; CHECK-NEXT: subq $536, %rsp ## imm = 0x218
336-; CHECK-NEXT: .cfi_def_cfa_offset 592
337+; CHECK-NEXT: subq $552, %rsp ## imm = 0x228
338+; CHECK-NEXT: .cfi_def_cfa_offset 608
339 ; CHECK-NEXT: .cfi_offset %rbx, -56
340 ; CHECK-NEXT: .cfi_offset %r12, -48
341 ; CHECK-NEXT: .cfi_offset %r13, -40
342@@ -54,7 +54,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
343 ; CHECK-NEXT: testb %al, %al
344 ; CHECK-NEXT: je LBB0_55
345 ; CHECK-NEXT: LBB0_4: ## %cleanup
346-; CHECK-NEXT: addq $536, %rsp ## imm = 0x218
347+; CHECK-NEXT: addq $552, %rsp ## imm = 0x228
348 ; CHECK-NEXT: popq %rbx
349 ; CHECK-NEXT: popq %r12
350 ; CHECK-NEXT: popq %r13
351@@ -68,7 +68,7 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
352 ; CHECK-NEXT: je LBB0_55
353 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720
354 ; CHECK-NEXT: movq %rdx, %rbx
355-; CHECK-NEXT: movq %rdi, %rbp
356+; CHECK-NEXT: movq %rdi, %r14
357 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax
358 ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx
359 ; CHECK-NEXT: cmpq %rax, %rcx
360@@ -78,10 +78,10 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
361 ; CHECK-NEXT: movl $32, %esi
362 ; CHECK-NEXT: callq _memset
363 ; CHECK-NEXT: LBB0_8: ## %while.body.preheader
364-; CHECK-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
365 ; CHECK-NEXT: imulq $1040, %rbx, %rax ## imm = 0x410
366 ; CHECK-NEXT: movq _syBuf@{{.*}}(%rip), %rcx
367-; CHECK-NEXT: leaq 8(%rcx,%rax), %rbx
368+; CHECK-NEXT: leaq 8(%rcx,%rax), %rax
369+; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
370 ; CHECK-NEXT: movl $1, %r15d
371 ; CHECK-NEXT: movq _syCTRO@{{.*}}(%rip), %rax
372 ; CHECK-NEXT: movb $1, %cl
373@@ -92,69 +92,70 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
374 ; CHECK-NEXT: testb %cl, %cl
375 ; CHECK-NEXT: jne LBB0_9
376 ; CHECK-NEXT: ## %bb.10: ## %do.end
377-; CHECK-NEXT: xorl %r14d, %r14d
378-; CHECK-NEXT: testb %r14b, %r14b
379+; CHECK-NEXT: xorl %ebp, %ebp
380+; CHECK-NEXT: testb %bpl, %bpl
381 ; CHECK-NEXT: jne LBB0_11
382 ; CHECK-NEXT: ## %bb.12: ## %while.body200.preheader
383-; CHECK-NEXT: xorl %edx, %edx
384-; CHECK-NEXT: leaq {{.*}}(%rip), %rsi
385-; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
386-; CHECK-NEXT: xorl %ebp, %ebp
387-; CHECK-NEXT: xorl %r13d, %r13d
388+; CHECK-NEXT: xorl %ebx, %ebx
389+; CHECK-NEXT: leaq {{.*}}(%rip), %r13
390+; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
391+; CHECK-NEXT: xorl %r12d, %r12d
392+; CHECK-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
393 ; CHECK-NEXT: jmp LBB0_13
394 ; CHECK-NEXT: .p2align 4, 0x90
395 ; CHECK-NEXT: LBB0_20: ## %sw.bb256
396 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
397-; CHECK-NEXT: movl %r14d, %r13d
398+; CHECK-NEXT: movl %ebp, %r12d
399 ; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge
400 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
401 ; CHECK-NEXT: decl %r15d
402 ; CHECK-NEXT: testl %r15d, %r15d
403-; CHECK-NEXT: movl %r13d, %r14d
404+; CHECK-NEXT: movl %r12d, %ebp
405 ; CHECK-NEXT: jle LBB0_22
406 ; CHECK-NEXT: LBB0_13: ## %while.body200
407 ; CHECK-NEXT: ## =>This Loop Header: Depth=1
408 ; CHECK-NEXT: ## Child Loop BB0_30 Depth 2
409 ; CHECK-NEXT: ## Child Loop BB0_38 Depth 2
410-; CHECK-NEXT: leal -268(%r14), %eax
411+; CHECK-NEXT: leal -268(%rbp), %eax
412 ; CHECK-NEXT: cmpl $105, %eax
413 ; CHECK-NEXT: ja LBB0_14
414 ; CHECK-NEXT: ## %bb.56: ## %while.body200
415 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
416-; CHECK-NEXT: movslq (%rdi,%rax,4), %rax
417-; CHECK-NEXT: addq %rdi, %rax
418+; CHECK-NEXT: movslq (%r13,%rax,4), %rax
419+; CHECK-NEXT: addq %r13, %rax
420 ; CHECK-NEXT: jmpq *%rax
421 ; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader
422 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
423-; CHECK-NEXT: testb %dl, %dl
424-; CHECK-NEXT: movl %r14d, %r13d
425+; CHECK-NEXT: testb %bl, %bl
426+; CHECK-NEXT: movl %ebp, %r12d
427 ; CHECK-NEXT: jne LBB0_21
428 ; CHECK-NEXT: jmp LBB0_55
429 ; CHECK-NEXT: .p2align 4, 0x90
430 ; CHECK-NEXT: LBB0_14: ## %while.body200
431 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
432-; CHECK-NEXT: leal 1(%r14), %eax
433+; CHECK-NEXT: leal 1(%rbp), %eax
434 ; CHECK-NEXT: cmpl $21, %eax
435 ; CHECK-NEXT: ja LBB0_20
436 ; CHECK-NEXT: ## %bb.15: ## %while.body200
437 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
438-; CHECK-NEXT: movl $-1, %r13d
439-; CHECK-NEXT: movslq (%rsi,%rax,4), %rax
440-; CHECK-NEXT: addq %rsi, %rax
441+; CHECK-NEXT: movl $-1, %r12d
442+; CHECK-NEXT: leaq {{.*}}(%rip), %rcx
443+; CHECK-NEXT: movslq (%rcx,%rax,4), %rax
444+; CHECK-NEXT: addq %rcx, %rax
445 ; CHECK-NEXT: jmpq *%rax
446 ; CHECK-NEXT: LBB0_18: ## %while.cond201.preheader
447 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
448-; CHECK-NEXT: movl $1, %r13d
449+; CHECK-NEXT: movl $1, %r12d
450 ; CHECK-NEXT: jmp LBB0_21
451 ; CHECK-NEXT: LBB0_26: ## %sw.bb474
452 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
453-; CHECK-NEXT: testb %dl, %dl
454-; CHECK-NEXT: ## implicit-def: $r12
455+; CHECK-NEXT: testb %bl, %bl
456+; CHECK-NEXT: ## implicit-def: $r14
457 ; CHECK-NEXT: jne LBB0_34
458 ; CHECK-NEXT: ## %bb.27: ## %do.body479.preheader
459 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
460-; CHECK-NEXT: testb %dl, %dl
461-; CHECK-NEXT: ## implicit-def: $r12
462+; CHECK-NEXT: testb %bl, %bl
463+; CHECK-NEXT: ## implicit-def: $r14
464 ; CHECK-NEXT: jne LBB0_34
465 ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader
466 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
467@@ -165,8 +166,8 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
468 ; CHECK-NEXT: .p2align 4, 0x90
469 ; CHECK-NEXT: LBB0_32: ## %do.body479.backedge
470 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
471-; CHECK-NEXT: leaq 1(%r12), %rax
472-; CHECK-NEXT: testb %dl, %dl
473+; CHECK-NEXT: leaq 1(%r14), %rax
474+; CHECK-NEXT: testb %bl, %bl
475 ; CHECK-NEXT: je LBB0_33
476 ; CHECK-NEXT: ## %bb.29: ## %land.rhs485
477 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
478@@ -175,15 +176,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
479 ; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780
480 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
481 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
482-; CHECK-NEXT: movq %rax, %r12
483-; CHECK-NEXT: testb %dl, %dl
484+; CHECK-NEXT: movq %rax, %r14
485+; CHECK-NEXT: testb %bl, %bl
486 ; CHECK-NEXT: jne LBB0_32
487 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500
488 ; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2
489 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100
490 ; CHECK-NEXT: callq ___maskrune
491-; CHECK-NEXT: xorl %edx, %edx
492-; CHECK-NEXT: testb %dl, %dl
493+; CHECK-NEXT: testb %bl, %bl
494 ; CHECK-NEXT: jne LBB0_32
495 ; CHECK-NEXT: jmp LBB0_34
496 ; CHECK-NEXT: LBB0_45: ## %sw.bb1134
497@@ -193,23 +193,23 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
498 ; CHECK-NEXT: cmpq %rax, %rcx
499 ; CHECK-NEXT: jb LBB0_55
500 ; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1
501-; CHECK-NEXT: xorl %ebp, %ebp
502-; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C
503+; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
504+; CHECK-NEXT: movl $268, %r12d ## imm = 0x10C
505 ; CHECK-NEXT: jmp LBB0_21
506-; CHECK-NEXT: LBB0_19: ## %sw.bb243
507+; CHECK-NEXT: LBB0_40: ## %sw.bb566
508 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
509-; CHECK-NEXT: movl $2, %r13d
510+; CHECK-NEXT: movl $20, %r12d
511 ; CHECK-NEXT: jmp LBB0_21
512-; CHECK-NEXT: LBB0_40: ## %sw.bb566
513+; CHECK-NEXT: LBB0_19: ## %sw.bb243
514 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
515-; CHECK-NEXT: movl $20, %r13d
516+; CHECK-NEXT: movl $2, %r12d
517 ; CHECK-NEXT: jmp LBB0_21
518 ; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit
519 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
520-; CHECK-NEXT: incq %r12
521+; CHECK-NEXT: incq %r14
522 ; CHECK-NEXT: LBB0_34: ## %if.end517
523 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
524-; CHECK-NEXT: leal -324(%r13), %eax
525+; CHECK-NEXT: leal -324(%r12), %eax
526 ; CHECK-NEXT: cmpl $59, %eax
527 ; CHECK-NEXT: ja LBB0_35
528 ; CHECK-NEXT: ## %bb.57: ## %if.end517
529@@ -219,11 +219,11 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
530 ; CHECK-NEXT: jb LBB0_38
531 ; CHECK-NEXT: LBB0_35: ## %if.end517
532 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
533-; CHECK-NEXT: cmpl $11, %r13d
534+; CHECK-NEXT: cmpl $11, %r12d
535 ; CHECK-NEXT: je LBB0_38
536 ; CHECK-NEXT: ## %bb.36: ## %if.end517
537 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
538-; CHECK-NEXT: cmpl $24, %r13d
539+; CHECK-NEXT: cmpl $24, %r12d
540 ; CHECK-NEXT: je LBB0_38
541 ; CHECK-NEXT: ## %bb.37: ## %if.then532
542 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
543@@ -233,15 +233,14 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
544 ; CHECK-NEXT: LBB0_38: ## %for.cond534
545 ; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1
546 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=2
547-; CHECK-NEXT: testb %dl, %dl
548+; CHECK-NEXT: testb %bl, %bl
549 ; CHECK-NEXT: jne LBB0_38
550 ; CHECK-NEXT: ## %bb.39: ## %for.cond542.preheader
551 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1
552-; CHECK-NEXT: testb %dl, %dl
553-; CHECK-NEXT: movb $0, (%r12)
554-; CHECK-NEXT: movl %r14d, %r13d
555-; CHECK-NEXT: leaq {{.*}}(%rip), %rsi
556-; CHECK-NEXT: leaq {{.*}}(%rip), %rdi
557+; CHECK-NEXT: testb %bl, %bl
558+; CHECK-NEXT: movb $0, (%r14)
559+; CHECK-NEXT: movl %ebp, %r12d
560+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 ## 8-byte Reload
561 ; CHECK-NEXT: jmp LBB0_21
562 ; CHECK-NEXT: .p2align 4, 0x90
563 ; CHECK-NEXT: LBB0_42: ## %while.cond864
564@@ -256,30 +255,44 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
565 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
566 ; CHECK-NEXT: jmp LBB0_25
567 ; CHECK-NEXT: LBB0_11:
568-; CHECK-NEXT: xorl %ebp, %ebp
569-; CHECK-NEXT: xorl %r13d, %r13d
570+; CHECK-NEXT: movl $0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
571+; CHECK-NEXT: xorl %r12d, %r12d
572 ; CHECK-NEXT: LBB0_22: ## %while.end1465
573-; CHECK-NEXT: incl %r13d
574-; CHECK-NEXT: cmpl $16, %r13d
575+; CHECK-NEXT: incl %r12d
576+; CHECK-NEXT: cmpl $16, %r12d
577 ; CHECK-NEXT: ja LBB0_50
578 ; CHECK-NEXT: ## %bb.23: ## %while.end1465
579 ; CHECK-NEXT: movl $83969, %eax ## imm = 0x14801
580-; CHECK-NEXT: btl %r13d, %eax
581+; CHECK-NEXT: btl %r12d, %eax
582 ; CHECK-NEXT: jae LBB0_50
583 ; CHECK-NEXT: ## %bb.24:
584-; CHECK-NEXT: xorl %ebp, %ebp
585-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
586+; CHECK-NEXT: xorl %ebx, %ebx
587 ; CHECK-NEXT: LBB0_48: ## %if.then1477
588 ; CHECK-NEXT: movl $1, %edx
589 ; CHECK-NEXT: callq _write
590-; CHECK-NEXT: subq %rbp, %rbx
591+; CHECK-NEXT: subq %rbx, %r14
592 ; CHECK-NEXT: movq _syHistory@{{.*}}(%rip), %rax
593-; CHECK-NEXT: leaq 8189(%rbx,%rax), %rax
594+; CHECK-NEXT: leaq 8189(%r14,%rax), %rax
595 ; CHECK-NEXT: .p2align 4, 0x90
596 ; CHECK-NEXT: LBB0_49: ## %for.body1723
597 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
598 ; CHECK-NEXT: decq %rax
599 ; CHECK-NEXT: jmp LBB0_49
600+; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit
601+; CHECK-NEXT: movq %r14, %rbx
602+; CHECK-NEXT: jmp LBB0_48
603+; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader
604+; CHECK-NEXT: xorl %eax, %eax
605+; CHECK-NEXT: testb %al, %al
606+; CHECK-NEXT: je LBB0_41
607+; CHECK-NEXT: .p2align 4, 0x90
608+; CHECK-NEXT: LBB0_17: ## %for.body643.us
609+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
610+; CHECK-NEXT: jmp LBB0_17
611+; CHECK-NEXT: .p2align 4, 0x90
612+; CHECK-NEXT: LBB0_41: ## %while.cond661
613+; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
614+; CHECK-NEXT: jmp LBB0_41
615 ; CHECK-NEXT: LBB0_50: ## %for.cond1480.preheader
616 ; CHECK-NEXT: movl $512, %eax ## imm = 0x200
617 ; CHECK-NEXT: cmpq %rax, %rax
618@@ -289,14 +302,15 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
619 ; CHECK-NEXT: testb %al, %al
620 ; CHECK-NEXT: jne LBB0_54
621 ; CHECK-NEXT: ## %bb.52: ## %while.body1679.preheader
622-; CHECK-NEXT: incl %ebp
623-; CHECK-NEXT: .p2align 4, 0x90
624+; CHECK-NEXT: incl {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
625 ; CHECK-NEXT: LBB0_53: ## %while.body1679
626 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
627-; CHECK-NEXT: movq (%rbx), %rdi
628+; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 8-byte Reload
629+; CHECK-NEXT: movq (%rax), %rdi
630 ; CHECK-NEXT: callq _fileno
631-; CHECK-NEXT: movslq %ebp, %rax
632-; CHECK-NEXT: leal 1(%rax), %ebp
633+; CHECK-NEXT: movslq {{[-0-9]+}}(%r{{[sb]}}p), %rax ## 4-byte Folded Reload
634+; CHECK-NEXT: leal 1(%rax), %ecx
635+; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
636 ; CHECK-NEXT: cmpq %rax, %rax
637 ; CHECK-NEXT: jl LBB0_53
638 ; CHECK-NEXT: LBB0_54: ## %while.cond1683.preheader
639@@ -304,22 +318,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) {
640 ; CHECK-NEXT: testb %al, %al
641 ; CHECK-NEXT: LBB0_55: ## %if.then.i
642 ; CHECK-NEXT: ud2
643-; CHECK-NEXT: LBB0_47: ## %if.then1477.loopexit
644-; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx ## 8-byte Reload
645-; CHECK-NEXT: movq %rbx, %rbp
646-; CHECK-NEXT: jmp LBB0_48
647-; CHECK-NEXT: LBB0_16: ## %while.cond635.preheader
648-; CHECK-NEXT: xorl %eax, %eax
649-; CHECK-NEXT: testb %al, %al
650-; CHECK-NEXT: je LBB0_41
651-; CHECK-NEXT: .p2align 4, 0x90
652-; CHECK-NEXT: LBB0_17: ## %for.body643.us
653-; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
654-; CHECK-NEXT: jmp LBB0_17
655-; CHECK-NEXT: .p2align 4, 0x90
656-; CHECK-NEXT: LBB0_41: ## %while.cond661
657-; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
658-; CHECK-NEXT: jmp LBB0_41
659 entry:
660 %sub.ptr.rhs.cast646 = ptrtoint i8* %line to i64
661 %old = alloca [512 x i8], align 16