198 lines
7.7 KiB
Plaintext
198 lines
7.7 KiB
Plaintext
$OpenBSD: patch-lib_Target_X86_X86FrameLowering_cpp,v 1.8 2020/08/05 06:49:48 jca Exp $
|
|
|
|
- Add RETGUARD to clang for amd64. This security mechanism uses per-function
|
|
random cookies to protect access to function return instructions, with the
|
|
effect that the integrity of the return address is protected, and function
|
|
return instructions are harder to use in ROP gadgets.
|
|
|
|
On function entry the return address is combined with a per-function random
|
|
cookie and stored in the stack frame. The integrity of this value is verified
|
|
before function return, and if this check fails, the program aborts. In this way
|
|
RETGUARD is an improved stack protector, since the cookies are per-function. The
|
|
verification routine is constructed such that the binary space immediately
|
|
before each ret instruction is padded with int03 instructions, which makes these
|
|
return instructions difficult to use in ROP gadgets. In the kernel, this has the
|
|
effect of removing approximately 50% of total ROP gadgets, and 15% of unique
|
|
ROP gadgets compared to the 6.3 release kernel. Function epilogues are
|
|
essentially gadget free, leaving only the polymorphic gadgets that result from
|
|
jumping into the instruction stream partway through other instructions. Work to
|
|
remove these gadgets will continue through other mechanisms.
|
|
- Refactor retguard to make adding additional arches easier.
|
|
- implement -msave-args in clang/llvm, like the sun did for gcc
|
|
|
|
Index: lib/Target/X86/X86FrameLowering.cpp
|
|
--- lib/Target/X86/X86FrameLowering.cpp.orig
|
|
+++ lib/Target/X86/X86FrameLowering.cpp
|
|
@@ -14,6 +14,7 @@
|
|
#include "X86InstrBuilder.h"
|
|
#include "X86InstrInfo.h"
|
|
#include "X86MachineFunctionInfo.h"
|
|
+#include "X86ReturnProtectorLowering.h"
|
|
#include "X86Subtarget.h"
|
|
#include "X86TargetMachine.h"
|
|
#include "llvm/ADT/SmallSet.h"
|
|
@@ -38,7 +39,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget
|
|
MaybeAlign StackAlignOverride)
|
|
: TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(),
|
|
STI.is64Bit() ? -8 : -4),
|
|
- STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {
|
|
+ STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()), RPL() {
|
|
// Cache a bunch of frame-related predicates for this subtarget.
|
|
SlotSize = TRI->getSlotSize();
|
|
Is64Bit = STI.is64Bit();
|
|
@@ -46,6 +47,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget
|
|
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
|
|
Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
|
|
StackPtr = TRI->getStackRegister();
|
|
+ SaveArgs = Is64Bit ? STI.getSaveArgs() : 0;
|
|
}
|
|
|
|
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
|
|
@@ -89,7 +91,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF
|
|
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
|
|
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
|
|
MFI.hasStackMap() || MFI.hasPatchPoint() ||
|
|
- MFI.hasCopyImplyingStackAdjustment());
|
|
+ MFI.hasCopyImplyingStackAdjustment() ||
|
|
+ SaveArgs);
|
|
}
|
|
|
|
static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
|
|
@@ -872,6 +875,24 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasic
|
|
MI->getOperand(3).setIsDead();
|
|
}
|
|
|
|
+// FIXME: Get this from tablegen.
|
|
+static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
|
|
+ const X86Subtarget &Subtarget) {
|
|
+ assert(Subtarget.is64Bit());
|
|
+
|
|
+ if (Subtarget.isCallingConvWin64(CallConv)) {
|
|
+ static const MCPhysReg GPR64ArgRegsWin64[] = {
|
|
+ X86::RCX, X86::RDX, X86::R8, X86::R9
|
|
+ };
|
|
+ return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
|
|
+ }
|
|
+
|
|
+ static const MCPhysReg GPR64ArgRegs64Bit[] = {
|
|
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
|
|
+ };
|
|
+ return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
|
|
+}
|
|
+
|
|
bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const {
|
|
// x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
|
|
// clobbered by any interrupt handler.
|
|
@@ -1146,6 +1167,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &M
|
|
nullptr, DwarfFramePtr));
|
|
}
|
|
|
|
+ if (SaveArgs && !Fn.arg_empty()) {
|
|
+ ArrayRef<MCPhysReg> GPRs =
|
|
+ get64BitArgumentGPRs(Fn.getCallingConv(), STI);
|
|
+ unsigned arg_size = Fn.arg_size();
|
|
+ unsigned RI = 0;
|
|
+ int64_t SaveSize = 0;
|
|
+
|
|
+ if (Fn.hasStructRetAttr()) {
|
|
+ GPRs = GPRs.drop_front(1);
|
|
+ arg_size--;
|
|
+ }
|
|
+
|
|
+ for (MCPhysReg Reg : GPRs) {
|
|
+ if (++RI > arg_size)
|
|
+ break;
|
|
+
|
|
+ SaveSize += SlotSize;
|
|
+
|
|
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
|
|
+ .addReg(Reg)
|
|
+ .setMIFlag(MachineInstr::FrameSetup);
|
|
+ }
|
|
+
|
|
+ // Realign the stack. PUSHes are the most space efficient.
|
|
+ while (SaveSize % getStackAlignment()) {
|
|
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
|
|
+ .addReg(GPRs.front())
|
|
+ .setMIFlag(MachineInstr::FrameSetup);
|
|
+
|
|
+ SaveSize += SlotSize;
|
|
+ }
|
|
+
|
|
+ //dlg StackSize -= SaveSize;
|
|
+ //dlg MFI.setStackSize(StackSize);
|
|
+ X86FI->setSaveArgSize(SaveSize);
|
|
+ }
|
|
+
|
|
if (NeedsWinFPO) {
|
|
// .cv_fpo_setframe $FramePtr
|
|
HasWinCFI = true;
|
|
@@ -1634,20 +1692,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &M
|
|
}
|
|
uint64_t SEHStackAllocAmt = NumBytes;
|
|
|
|
- if (HasFP) {
|
|
- // Pop EBP.
|
|
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
|
|
- MachineFramePtr)
|
|
- .setMIFlag(MachineInstr::FrameDestroy);
|
|
- if (NeedsDwarfCFI) {
|
|
- unsigned DwarfStackPtr =
|
|
- TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
|
|
- BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
|
|
- nullptr, DwarfStackPtr, -SlotSize));
|
|
- --MBBI;
|
|
- }
|
|
- }
|
|
-
|
|
MachineBasicBlock::iterator FirstCSPop = MBBI;
|
|
// Skip the callee-saved pop instructions.
|
|
while (MBBI != MBB.begin()) {
|
|
@@ -1717,6 +1761,28 @@ void X86FrameLowering::emitEpilogue(MachineFunction &M
|
|
--MBBI;
|
|
}
|
|
|
|
+ if (HasFP) {
|
|
+ MBBI = Terminator;
|
|
+
|
|
+ if (X86FI->getSaveArgSize()) {
|
|
+ // LEAVE is effectively mov rbp,rsp; pop rbp
|
|
+ BuildMI(MBB, MBBI, DL, TII.get(X86::LEAVE64))
|
|
+ .setMIFlag(MachineInstr::FrameDestroy);
|
|
+ } else {
|
|
+ // Pop EBP.
|
|
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
|
|
+ MachineFramePtr)
|
|
+ .setMIFlag(MachineInstr::FrameDestroy);
|
|
+ }
|
|
+ if (NeedsDwarfCFI) {
|
|
+ unsigned DwarfStackPtr =
|
|
+ TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
|
|
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
|
|
+ nullptr, DwarfStackPtr, -SlotSize));
|
|
+ --MBBI;
|
|
+ }
|
|
+ }
|
|
+
|
|
// Windows unwinder will not invoke function's exception handler if IP is
|
|
// either in prologue or in epilogue. This behavior causes a problem when a
|
|
// call immediately precedes an epilogue, because the return address points
|
|
@@ -1814,6 +1880,8 @@ int X86FrameLowering::getFrameIndexReference(const Mac
|
|
"FPDelta isn't aligned per the Win64 ABI!");
|
|
}
|
|
|
|
+ if (FI >= 0)
|
|
+ Offset -= X86FI->getSaveArgSize();
|
|
|
|
if (TRI->hasBasePointer(MF)) {
|
|
assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
|
|
@@ -3218,4 +3286,8 @@ void X86FrameLowering::processFunctionBeforeFrameFinal
|
|
addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
|
|
UnwindHelpFI)
|
|
.addImm(-2);
|
|
+}
|
|
+
|
|
+const ReturnProtectorLowering *X86FrameLowering::getReturnProtector() const {
|
|
+ return &RPL;
|
|
}
|