Skip to content

Commit

Permalink
Fix RemoveUnstructuredLoopExits when an exiting edge jumps out multip…
Browse files Browse the repository at this point in the history
…le levels of loops

Before doing any major surgery on an exit from loop L, ensure that if an
exit edge from L goes to block X, then X is in L's parent loop or no
loop at all.

Add test cases:
- a reduced test case where the exiting block does not dominate its own
  loop latch.
- a reduced test case where the exiting block is the latch for its own
  loop. This reproduces the assert triggered by the original HLSL.
- the original HLSL that triggered this bug fix.
- the intermediate module from the original HLSL, taken just before
  the attempt to remove unstructured loop exits.
  • Loading branch information
dneto0 committed May 31, 2024
1 parent 978d362 commit e62783e
Show file tree
Hide file tree
Showing 5 changed files with 518 additions and 0 deletions.
84 changes: 84 additions & 0 deletions lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,12 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"

Expand Down Expand Up @@ -456,6 +458,79 @@ static SmallVector<Value_Info, 8> CollectExitValues(Value *new_exit_cond,
return exit_values;
}

// Ensures the branch from exiting_block to outside L escapes exactly one
// level of loop nesting, and does not immediately jump into an otherwise
// unrelated loop. Creates a downstream block as needed. If the exiting edge is
// critical, it will be split. Updates dominator tree and loop info. Returns
// true if any changes were made.
static bool EnsureSingleLevelExit(Loop *L, LoopInfo *LI, DominatorTree *DT,
BasicBlock *exiting_block) {
BasicBlock *exit_block = GetExitBlockForExitingBlock(L, exiting_block);

Loop *exit_loop = LI->getLoopFor(exit_block);
assert(L != exit_loop);

Loop *parent_loop = L->getParentLoop();
if (parent_loop != exit_loop) {
// Split the edge between the blocks, returning the newly created block.
BasicBlock *new_bb = SplitEdge(exiting_block, exit_block, DT, LI);
// The new block might be in the middle or at the end.
BasicBlock *middle_bb;
if (new_bb->getSingleSuccessor() == exit_block) {
middle_bb = new_bb;
} else {
middle_bb = exit_block;
exit_block = new_bb;
}

// What loop does middle_bb end up in? SplitEdge has these cases:
// If the edge was critical:
// if source block is not in a loop: ruled out already
// if dest block is not in a loop --> not in any loop.
// if going from outer loop to inner loop: ruled out already
// if going from inner loop to outer loop --> outer loop
// if loops unrelated by containment -> the parent loop of the
// destination block (which must be a loop header because we
// assume irreducible loops).
// If the edge was non-critcial:
// If the exit block only had one incominge edge --> same loop as
// destination block.
// otherwise the exiting block had a single successor.
// This is ruled out because the the exiting block ends with a
// conditional branch, and so has two successors.

// Move the middle_block to the parent loop, if it exists.
// If all goes well, the latch exit block will branch to it.
// If the algorithm bails early, then there is no harm in putting
// it in L's parent loop. At worst it will be an exiting block for
// the parent loop.
LI->removeBlock(middle_bb);
if (parent_loop) {
parent_loop->addBasicBlockToLoop(middle_bb, *LI);

// middle_bb block is now an exiting block, going from parent_loop to
// exit_loop, which we know are different. Make sure it ends in a
// in a conditional branch, as expected by the rest of the algorithm.
auto *br = cast<BranchInst>(middle_bb->getTerminator());
assert(!br->isConditional());
br->eraseFromParent();
BasicBlock *parent_latch = parent_loop->getLoopLatch();
BranchInst::Create(exit_block, parent_latch,
ConstantInt::getTrue(br->getContext()), middle_bb);
// Fix phis in parent_latch
for (Instruction &inst : *parent_latch) {
PHINode *phi = dyn_cast<PHINode>(&inst);
if (!phi)
break;
// We don't care about the values. The path is never taken.
phi->addIncoming(GetDefaultValue(phi->getType()), middle_bb);
}
}
return true;
}
return false;
}

// Restructures exiting_block so its work, including its exit branch, is moved
// to a block B that dominates the latch block. Let's call B the
// newly-exiting-block.
Expand All @@ -465,6 +540,15 @@ static bool RemoveUnstructuredLoopExitsIteration(BasicBlock *exiting_block,
Loop *L, LoopInfo *LI,
DominatorTree *DT) {
BasicBlock *latch = L->getLoopLatch();

if (EnsureSingleLevelExit(L, LI, DT, latch)) {
// Exit early so we're forced to recompute exit blocks.
return true;
}
if (EnsureSingleLevelExit(L, LI, DT, exiting_block)) {
return true;
}

BasicBlock *latch_exit = GetExitBlockForExitingBlock(L, latch);
BasicBlock *exit_block = GetExitBlockForExitingBlock(L, exiting_block);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
; RUN: opt %s -analyze -loops | FileCheck -check-prefix=LOOPBEFORE %s
; RUN: opt %s -dxil-remove-unstructured-loop-exits -o %t.bc
; RUN: opt %t.bc -S | FileCheck %s
; RUN: opt %t.bc -analyze -loops | FileCheck -check-prefix=LOOPAFTER %s

; The exiting edge from the latch block of the loop at depth 3 exits to the loop at depth 1.
; This reproduces the original bug.
;
; Loop exits are 'dedicated', one of the LoopSimplifyForm criteria.

; LOOPBEFORE: Loop at depth 1 containing: %header.1<header>,%header.2,%header.3,%if.3,%exiting.3,%endif.3,%latch.3,%latch.3.exit,%exit.3.to.2,%latch.2,%latch.2.exit,%latch.1<latch><exiting>
; LOOPBEFORE-NEXT: Loop at depth 2 containing: %header.2<header>,%header.3,%if.3,%exiting.3,%endif.3,%latch.3<exiting>,%exit.3.to.2,%latch.2<latch><exiting>
; LOOPBEFORE-NEXT: Loop at depth 3 containing: %header.3<header>,%if.3,%exiting.3<exiting>,%endif.3,%latch.3<latch><exiting>
; no more loops expected
; LOOPBEFORE-NOT: Loop at depth

; LOOPAFTER: Loop at depth 1 containing: %header.1<header>,%header.2,%header.3,%if.3,%exiting.3,%dx.struct_exit.new_exiting,%endif.3,%latch.3,%latch.3.exit,%0,%exit.3.to.2,%dx.struct_exit.new_exiting4,%latch.2,%latch.2.exit,%1,%latch.3.exit.split,%latch.1<latch><exiting>
; LOOPAFTER-NEXT: Loop at depth 2 containing: %header.2<header>,%header.3,%if.3,%exiting.3,%dx.struct_exit.new_exiting,%endif.3,%latch.3,%latch.3.exit,%0,%exit.3.to.2,%dx.struct_exit.new_exiting4<exiting>,%latch.2<latch><exiting>
; LOOPAFTER-NEXT: Loop at depth 3 containing: %header.3<header>,%if.3,%exiting.3,%dx.struct_exit.new_exiting<exiting>,%endif.3,%latch.3<latch><exiting>
; no more loops expected
; LOOPAFTER-NOT: Loop at depth


target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"


define void @main(i1 %cond) {
entry:
br label %header.1

header.1:
br label %header.2

header.2:
br label %header.3

header.3:
br label %if.3

if.3:
br i1 %cond, label %exiting.3, label %endif.3

exiting.3:
%x3val = add i32 0, 0
br i1 %cond, label %exit.3.to.2, label %endif.3

endif.3:
br label %latch.3

latch.3:
br i1 %cond, label %latch.3.exit, label %header.3

latch.3.exit:
br label %latch.1

latch.2:
%l2val = phi i32 [ %x3val, %exit.3.to.2 ]
br i1 %cond, label %latch.2.exit, label %header.2

latch.2.exit:
br label %latch.1

exit.3.to.2:
br label %latch.2

latch.1:
br i1 %cond, label %end, label %header.1

end:
ret void
}


; CHECK: define void @main(i1 %cond) {
; CHECK: entry:
; CHECK: br label %header.1

; CHECK: header.1:
; CHECK: br label %header.2

; CHECK: header.2:
; CHECK: br label %header.3

; CHECK: header.3:
; CHECK: br label %if.3

; CHECK: if.3:
; CHECK: br i1 %cond, label %exiting.3, label %dx.struct_exit.new_exiting

; CHECK: exiting.3:
; CHECK: %x3val = add i32 0, 0
; CHECK: br label %dx.struct_exit.new_exiting

; CHECK: dx.struct_exit.new_exiting:
; CHECK: %dx.struct_exit.prop1 = phi i1 [ %cond, %exiting.3 ], [ false, %if.3 ]
; CHECK: %dx.struct_exit.prop = phi i32 [ %x3val, %exiting.3 ], [ 0, %if.3 ]
; CHECK: br i1 %dx.struct_exit.prop1, label %latch.3.exit, label %endif.3

; CHECK: endif.3:
; CHECK: br label %latch.3

; CHECK: latch.3:
; CHECK: br i1 %cond, label %latch.3.exit, label %header.3

; CHECK: latch.3.exit:
; CHECK: %dx.struct_exit.exit_cond_lcssa = phi i1 [ %dx.struct_exit.prop1, %dx.struct_exit.new_exiting ], [ false, %latch.3 ]
; CHECK: %dx.struct_exit.val_lcssa = phi i32 [ %dx.struct_exit.prop, %dx.struct_exit.new_exiting ], [ 0, %latch.3 ]
; CHECK: br i1 %dx.struct_exit.exit_cond_lcssa, label %exit.3.to.2, label %0

; CHECK: <label>:0
; CHECK: br label %dx.struct_exit.new_exiting4

; CHECK: latch.3.exit.split:
; CHECK: br label %latch.1

; CHECK: dx.struct_exit.new_exiting4:
; CHECK: %dx.struct_exit.prop3 = phi i1 [ true, %0 ], [ false, %exit.3.to.2 ]
; CHECK: %l2val = phi i32 [ %x3val.lcssa, %exit.3.to.2 ], [ 0, %0 ]
; CHECK: br i1 %dx.struct_exit.prop3, label %latch.2.exit, label %latch.2

; CHECK: latch.2:
; CHECK: br i1 %cond, label %latch.2.exit, label %header.2

; CHECK: latch.2.exit:
; CHECK: %dx.struct_exit.exit_cond_lcssa6 = phi i1 [ %dx.struct_exit.prop3, %dx.struct_exit.new_exiting4 ], [ false, %latch.2 ]
; CHECK: br i1 %dx.struct_exit.exit_cond_lcssa6, label %latch.3.exit.split, label %1

; CHECK: <label>:1
; CHECK: br label %latch.1

; CHECK: exit.3.to.2:
; CHECK: %x3val.lcssa = phi i32 [ %dx.struct_exit.val_lcssa, %latch.3.exit ]
; CHECK: br label %dx.struct_exit.new_exiting4

; CHECK: latch.1:
; CHECK: br i1 %cond, label %end, label %header.1

; CHECK: end:
; CHECK: ret void
; CHECK: }
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
; RUN: opt %s -analyze -loops | FileCheck -check-prefix=LOOPBEFORE %s
; RUN: opt %s -dxil-remove-unstructured-loop-exits -o %t.bc

; This is the test case from ../../../../tools/clang/test/HLSLFileCheck/hlsl/control_flow/loops/multi_level_exit_regression.hlsl
; but just before the attempt to remove unstructured loop exits.
; There used to be a bug where the algorithm did not handle the branch that goes
; directly from: while.body.7.i.backege (in loop at depth 3)
; to: while.body.i.loopexit (in loop at depth 1)


; LOOPBEFORE: Loop at depth 1 containing: %while.body.i<header>,%while.body.7.i.preheader.preheader,%while.body.7.i.preheader,%while.body.i.loopexit.5,%if.end.i.preheader,%if.end.i,%if.end.15.i,%if.then.11.i,%while.body.7.i.backedge,%while.body.i.loopexit,%while.body.2.i.loopexit,%if.end.19.i.loopexit,%if.end.19.i,%while.end.22.i,%while.body.i.backedge<latch>
; LOOPBEFORE-NEXT: Loop at depth 2 containing: %while.body.7.i.preheader<header><exiting>,%if.end.i.preheader,%if.end.i,%if.end.15.i,%if.then.11.i,%while.body.7.i.backedge<exiting>,%while.body.2.i.loopexit<latch><exiting>
; LOOPBEFORE-NEXT: Loop at depth 3 containing: %if.end.i<header>,%if.end.15.i<exiting>,%if.then.11.i<exiting>,%while.body.7.i.backedge<latch><exiting>
; no more loops expected
; LOOPBEFORE-NOT: Loop at depth

; ModuleID = 'standalone.hlsl'
target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-ms-dx"


%struct.RWByteAddressBuffer = type { i32 }
%dx.types.Handle = type { i8* }
%dx.types.ResourceProperties = type { i32, i32 }

@"\01?g_1@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4
@llvm.used = appending global [1 x i8*] [i8* bitcast (%struct.RWByteAddressBuffer* @"\01?g_1@@3URWByteAddressBuffer@@A" to i8*)], section "llvm.metadata"

; Function Attrs: nounwind
define void @main() {
entry:
%0 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?g_1@@3URWByteAddressBuffer@@A"
br label %while.body.i

while.body.i.loopexit: ; preds = %while.body.7.i.backedge
br label %while.body.i.backedge

while.body.i.loopexit.5: ; preds = %while.body.7.i.preheader
br label %while.body.i.backedge

while.body.i: ; preds = %while.body.i.backedge, %entry
%l.i.0 = phi i32 [ 0, %entry ], [ %l.i.0.be, %while.body.i.backedge ]
%cmp.i = icmp ult i32 %l.i.0, 512
%cmp3.i.3 = icmp ne i32 %l.i.0, 9
br i1 %cmp3.i.3, label %while.body.7.i.preheader.preheader, label %if.end.19.i

while.body.7.i.preheader.preheader: ; preds = %while.body.i
br label %while.body.7.i.preheader

while.body.2.i.loopexit: ; preds = %if.then.11.i, %if.end.15.i
%cmp3.i = icmp ne i32 %l.i.0, 9
br i1 %cmp3.i, label %while.body.7.i.preheader, label %if.end.19.i.loopexit

while.body.7.i.preheader: ; preds = %while.body.7.i.preheader.preheader, %while.body.2.i.loopexit
br i1 %cmp.i, label %if.end.i.preheader, label %while.body.i.loopexit.5

if.end.i.preheader: ; preds = %while.body.7.i.preheader
br label %if.end.i

if.end.i: ; preds = %if.end.i.preheader, %while.body.7.i.backedge
br i1 true, label %if.then.11.i, label %if.end.15.i

if.then.11.i: ; preds = %if.end.i
br i1 %cmp.i, label %while.body.2.i.loopexit, label %while.body.7.i.backedge

while.body.7.i.backedge: ; preds = %if.then.11.i, %if.end.15.i
br i1 false, label %if.end.i, label %while.body.i.loopexit

if.end.15.i: ; preds = %if.end.i
br i1 %cmp.i, label %while.body.2.i.loopexit, label %while.body.7.i.backedge

if.end.19.i.loopexit: ; preds = %while.body.2.i.loopexit
br label %if.end.19.i

if.end.19.i: ; preds = %if.end.19.i.loopexit, %while.body.i
br i1 %cmp.i, label %while.end.22.i, label %while.body.i.backedge

while.end.22.i: ; preds = %if.end.19.i
%mul.i = mul i32 4, %l.i.0
%1 = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer %0)
%2 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })
call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle %2, i32 %mul.i, i32 undef, i32 %l.i.0, i32 undef, i32 undef, i32 undef, i8 1, i32 4)
%add.i = add i32 %l.i.0, 1
br label %while.body.i.backedge

while.body.i.backedge: ; preds = %while.end.22.i, %if.end.19.i, %while.body.i.loopexit, %while.body.i.loopexit.5
%l.i.0.be = phi i32 [ %add.i, %while.end.22.i ], [ 0, %if.end.19.i ], [ 0, %while.body.i.loopexit ], [ 0, %while.body.i.loopexit.5 ]
br label %while.body.i
}

; Function Attrs: nounwind readnone
declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer)

; Function Attrs: nounwind readnone
declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)

; Function Attrs: nounwind
declare void @dx.op.rawBufferStore.i32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i8, i32)

; Function Attrs: nounwind readonly
declare %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32, %struct.RWByteAddressBuffer)

; Function Attrs: nounwind readnone
declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties)
Loading

0 comments on commit e62783e

Please sign in to comment.