Skip to content

Commit

Permalink
[X86] Fix bug in x86_intrcc with arg copy elision
Browse files Browse the repository at this point in the history
Summary:
Use a custom calling convention handler for interrupts instead of fixing
up the locations in LowerMemArgument. This way, the offsets are correct
when constructed and we don't need to account for them in as many
places.

Depends on D56883

Replaces D56275

Reviewers: craig.topper, phil-opp

Subscribers: hiraditya, llvm-commits

Differential Revision: https://reviews.llvm.org/D56944

llvm-svn: 354837
  • Loading branch information
rnk committed Feb 26, 2019
1 parent b4e16e6 commit 2f055f0
Show file tree
Hide file tree
Showing 6 changed files with 192 additions and 45 deletions.
40 changes: 40 additions & 0 deletions llvm/lib/Target/X86/X86CallingConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,5 +287,45 @@ static bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
return true;
}

/// X86 interrupt handlers can only take one or two stack arguments, but if
/// there are two arguments, they are in the opposite order from the standard
/// convention. Therefore, we have to look at the argument count up front before
/// allocating stack for each argument.
static bool CC_X86_Intr(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
const MachineFunction &MF = State.getMachineFunction();
size_t ArgCount = State.getMachineFunction().getFunction().arg_size();
bool Is64Bit = static_cast<const X86Subtarget &>(MF.getSubtarget()).is64Bit();
unsigned SlotSize = Is64Bit ? 8 : 4;
unsigned Offset;
if (ArgCount == 1 && ValNo == 0) {
// If we have one argument, the argument is five stack slots big, at fixed
// offset zero.
Offset = State.AllocateStack(5 * SlotSize, 4);
} else if (ArgCount == 2 && ValNo == 0) {
// If we have two arguments, the stack slot is *after* the error code
// argument. Pretend it doesn't consume stack space, and account for it when
// we assign the second argument.
Offset = SlotSize;
} else if (ArgCount == 2 && ValNo == 1) {
// If this is the second of two arguments, it must be the error code. It
// appears first on the stack, and is then followed by the five slot
// interrupt struct.
Offset = 0;
(void)State.AllocateStack(6 * SlotSize, 4);
} else {
report_fatal_error("unsupported x86 interrupt prototype");
}

// FIXME: This should be accounted for in
// X86FrameLowering::getFrameIndexReference, not here.
if (Is64Bit && ArgCount == 2)
Offset += SlotSize;

State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}

// Provides entry points of CC_X86 and RetCC_X86.
#include "X86GenCallingConv.inc"
12 changes: 2 additions & 10 deletions llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -985,14 +985,6 @@ def CC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<CC_X86_32_C>
]>;

def CC_X86_32_Intr : CallingConv<[
CCAssignToStack<4, 4>
]>;

def CC_X86_64_Intr : CallingConv<[
CCAssignToStack<8, 8>
]>;

//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
Expand All @@ -1001,7 +993,7 @@ def CC_X86_64_Intr : CallingConv<[
def CC_X86_32 : CallingConv<[
// X86_INTR calling convention is valid in MCU target and should override the
// MCU calling convention. Thus, this should be checked before isTargetMCU().
CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,
CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
Expand Down Expand Up @@ -1029,7 +1021,7 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::X86_RegCall",
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
CCIfCC<"CallingConv::X86_INTR", CCCustom<"CC_X86_Intr">>,

// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/X86/X86FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1773,6 +1773,15 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int64_t FPDelta = 0;

// In an x86 interrupt, remove the offset we added to account for the return
// address from any stack object allocated in the caller's frame. Interrupts
// do not have a standard return address. Fixed objects in the current frame,
// such as SSE register spills, should not get this treatment.
if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR &&
Offset >= 0) {
Offset += getOffsetOfLocalArea();
}

if (IsWin64Prologue) {
assert(!MFI.hasCalls() || (StackSize % 16) == 8);

Expand Down
33 changes: 0 additions & 33 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2976,22 +2976,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
else
ValVT = VA.getValVT();

// Calculate SP offset of interrupt parameter, re-arrange the slot normally
// taken by a return address.
int Offset = 0;
if (CallConv == CallingConv::X86_INTR) {
// X86 interrupts may take one or two arguments.
// On the stack there will be no return address as in regular call.
// Offset of last argument need to be set to -4/-8 bytes.
// Where offset of the first argument out of two, should be set to 0 bytes.
Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
if (Subtarget.is64Bit() && Ins.size() == 2) {
// The stack pointer needs to be realigned for 64 bit handlers with error
// code, so the argument offset changes by 8 bytes.
Offset += 8;
}
}

// FIXME: For now, all byval parameter objects are marked mutable. This can be
// changed with more analysis.
// In case of tail call optimization mark all arguments mutable. Since they
Expand All @@ -3004,10 +2988,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
// can be improved with deeper analysis.
int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
/*isAliased=*/true);
// Adjust SP offset of interrupt parameter.
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}
return DAG.getFrameIndex(FI, PtrVT);
}

Expand Down Expand Up @@ -3062,11 +3042,6 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
MFI.setObjectSExt(FI, true);
}

// Adjust SP offset of interrupt parameter.
if (CallConv == CallingConv::X86_INTR) {
MFI.setObjectOffset(FI, Offset);
}

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getLoad(
ValVT, dl, Chain, FIN,
Expand Down Expand Up @@ -3156,14 +3131,6 @@ SDValue X86TargetLowering::LowerFormalArguments(
!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe");

if (CallConv == CallingConv::X86_INTR) {
bool isLegal = Ins.size() == 1 ||
(Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
(!Is64Bit && Ins[1].VT == MVT::i32)));
if (!isLegal)
report_fatal_error("X86 interrupts may take one or two arguments");
}

// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
Expand Down
68 changes: 67 additions & 1 deletion llvm/test/CodeGen/X86/x86-32-intrcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }

@llvm.used = appending global [4 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*), i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_x87 to i8*)], section "llvm.metadata"
@sink_address = global i32* null
@sink_i32 = global i32 0


; Spills eax, putting original esp at +4.
; No stack adjustment if declared with no error code
Expand Down Expand Up @@ -93,3 +95,67 @@ entry:
store x86_fp80 %add, x86_fp80* @f80, align 4
ret void
}

; Use a frame pointer to check the offsets. No return address, arguments start
; at EBP+4.
define dso_local x86_intrcc void @test_fp_1(%struct.interrupt_frame* %p) #0 {
; CHECK-LABEL: test_fp_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK: cld
; CHECK-DAG: leal 4(%ebp), %[[R1:[^ ]*]]
; CHECK-DAG: leal 20(%ebp), %[[R2:[^ ]*]]
; CHECK: movl %[[R1]], sink_address
; CHECK: movl %[[R2]], sink_address
; CHECK: popl %ebp
; CHECK: iretl
entry:
%arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 0
%arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 4
store volatile i32* %arrayidx, i32** @sink_address
store volatile i32* %arrayidx2, i32** @sink_address
ret void
}

; The error code is between EBP and the interrupt_frame.
define dso_local x86_intrcc void @test_fp_2(%struct.interrupt_frame* %p, i32 %err) #0 {
; CHECK-LABEL: test_fp_2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK: cld
; CHECK-DAG: movl 4(%ebp), %[[R3:[^ ]*]]
; CHECK-DAG: leal 8(%ebp), %[[R1:[^ ]*]]
; CHECK-DAG: leal 24(%ebp), %[[R2:[^ ]*]]
; CHECK: movl %[[R1]], sink_address
; CHECK: movl %[[R2]], sink_address
; CHECK: movl %[[R3]], sink_i32
; CHECK: popl %ebp
; CHECK: iretl
entry:
%arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 0
%arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i32 0, i32 4
store volatile i32* %arrayidx, i32** @sink_address
store volatile i32* %arrayidx2, i32** @sink_address
store volatile i32 %err, i32* @sink_i32
ret void
}

; Test argument copy elision when copied to a local alloca.
define x86_intrcc void @test_copy_elide(%struct.interrupt_frame* %frame, i32 %err) #0 {
; CHECK-LABEL: test_copy_elide:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: movl %esp, %ebp
; CHECK: cld
; CHECK: leal 4(%ebp), %[[R1:[^ ]*]]
; CHECK: movl %[[R1]], sink_address
entry:
%err.addr = alloca i32, align 4
store i32 %err, i32* %err.addr, align 4
store volatile i32* %err.addr, i32** @sink_address
ret void
}

attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
75 changes: 74 additions & 1 deletion llvm/test/CodeGen/X86/x86-64-intrcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

%struct.interrupt_frame = type { i64, i64, i64, i64, i64 }

@llvm.used = appending global [4 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*), i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_x87 to i8*)], section "llvm.metadata"
@sink_address = global i64* null
@sink_i32 = global i64 0

; Spills rax, putting original esp at +8.
; No stack adjustment if declared with no error code
Expand Down Expand Up @@ -105,3 +106,75 @@ entry:
store x86_fp80 %add, x86_fp80* @f80, align 4
ret void
}

; Use a frame pointer to check the offsets. No return address, arguments start
; at RBP+4.
define dso_local x86_intrcc void @test_fp_1(%struct.interrupt_frame* %p) #0 {
; CHECK-LABEL: test_fp_1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK-DAG: leaq 8(%rbp), %[[R1:[^ ]*]]
; CHECK-DAG: leaq 40(%rbp), %[[R2:[^ ]*]]
; CHECK: movq %[[R1]], sink_address
; CHECK: movq %[[R2]], sink_address
; CHECK: popq %rbp
; CHECK: iretq
entry:
%arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 0
%arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 4
store volatile i64* %arrayidx, i64** @sink_address
store volatile i64* %arrayidx2, i64** @sink_address
ret void
}

; The error code is between RBP and the interrupt_frame.
define dso_local x86_intrcc void @test_fp_2(%struct.interrupt_frame* %p, i64 %err) #0 {
; CHECK-LABEL: test_fp_2:
; CHECK: # %bb.0: # %entry
; This RAX push is just to align the stack.
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK-DAG: movq 16(%rbp), %[[R3:[^ ]*]]
; CHECK-DAG: leaq 24(%rbp), %[[R1:[^ ]*]]
; CHECK-DAG: leaq 56(%rbp), %[[R2:[^ ]*]]
; CHECK: movq %[[R1]], sink_address(%rip)
; CHECK: movq %[[R2]], sink_address(%rip)
; CHECK: movq %[[R3]], sink_i32(%rip)
; CHECK: popq %rbp
; Pop off both the error code and the 8 byte alignment adjustment from the
; prologue.
; CHECK: addq $16, %rsp
; CHECK: iretq
entry:
%arrayidx = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 0
%arrayidx2 = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %p, i64 0, i32 4
store volatile i64* %arrayidx, i64** @sink_address
store volatile i64* %arrayidx2, i64** @sink_address
store volatile i64 %err, i64* @sink_i32
ret void
}

; Test argument copy elision when copied to a local alloca.
define x86_intrcc void @test_copy_elide(%struct.interrupt_frame* %frame, i64 %err) #0 {
; CHECK-LABEL: test_copy_elide:
; CHECK: # %bb.0: # %entry
; This RAX push is just to align the stack.
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK: cld
; CHECK: leaq 16(%rbp), %[[R1:[^ ]*]]
; CHECK: movq %[[R1]], sink_address(%rip)
entry:
%err.addr = alloca i64, align 4
store i64 %err, i64* %err.addr, align 4
store volatile i64* %err.addr, i64** @sink_address
ret void
}


attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }

0 comments on commit 2f055f0

Please sign in to comment.