diff options
Diffstat (limited to 'src/common/x64')
-rw-r--r-- | src/common/x64/abi.cpp | 127 | ||||
-rw-r--r-- | src/common/x64/abi.h | 15 | ||||
-rw-r--r-- | src/common/x64/cpu_detect.cpp | 92 | ||||
-rw-r--r-- | src/common/x64/emitter.cpp | 2612 | ||||
-rw-r--r-- | src/common/x64/emitter.h | 601 |
5 files changed, 2093 insertions, 1354 deletions
diff --git a/src/common/x64/abi.cpp b/src/common/x64/abi.cpp index 955eb86ce..504b9c940 100644 --- a/src/common/x64/abi.cpp +++ b/src/common/x64/abi.cpp @@ -22,7 +22,8 @@ using namespace Gen; // Shared code between Win64 and Unix64 -void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { +void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, + size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp) { size_t shadow = 0; #if defined(_WIN32) shadow = 0x20; @@ -49,17 +50,19 @@ void XEmitter::ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_ *xmm_offsetp = subtraction - xmm_base_subtraction; } -size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) { +size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, + size_t needed_frame_size) { size_t shadow, subtraction, xmm_offset; - ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); + ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, + &xmm_offset); - for (int r : mask & ABI_ALL_GPRS) + for (int r : mask& ABI_ALL_GPRS) PUSH((X64Reg)r); if (subtraction) SUB(64, R(RSP), subtraction >= 0x80 ? Imm32((u32)subtraction) : Imm8((u8)subtraction)); - for (int x : mask & ABI_ALL_FPRS) { + for (int x : mask& ABI_ALL_FPRS) { MOVAPD(MDisp(RSP, (int)xmm_offset), (X64Reg)(x - 16)); xmm_offset += 16; } @@ -67,12 +70,14 @@ size_t XEmitter::ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_align return shadow; } -void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size) { +void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, + size_t needed_frame_size) { size_t shadow, subtraction, xmm_offset; - ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, &xmm_offset); + ABI_CalculateFrameSize(mask, rsp_alignment, needed_frame_size, &shadow, &subtraction, + &xmm_offset); - for (int x : mask & ABI_ALL_FPRS) { - MOVAPD((X64Reg) (x - 16), MDisp(RSP, (int)xmm_offset)); + for (int x : mask& ABI_ALL_FPRS) { + MOVAPD((X64Reg)(x - 16), MDisp(RSP, (int)xmm_offset)); xmm_offset += 16; } @@ -86,10 +91,9 @@ void XEmitter::ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignmen } // Common functions -void XEmitter::ABI_CallFunction(const void *func) { +void XEmitter::ABI_CallFunction(const void* func) { u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -98,11 +102,10 @@ void XEmitter::ABI_CallFunction(const void *func) { } } -void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) { +void XEmitter::ABI_CallFunctionC16(const void* func, u16 param1) { MOV(32, R(ABI_PARAM1), Imm32((u32)param1)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -111,25 +114,23 @@ void XEmitter::ABI_CallFunctionC16(const void *func, u16 param1) { } } -void XEmitter::ABI_CallFunctionCC16(const void *func, u32 param1, u16 param2) { +void XEmitter::ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2) { MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32((u32)param2)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { - // Far call - MOV(64, R(RAX), ImmPtr(func)); - CALLptr(R(RAX)); + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { + // Far call + MOV(64, R(RAX), ImmPtr(func)); + CALLptr(R(RAX)); } else { CALL(func); } } -void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) { +void XEmitter::ABI_CallFunctionC(const void* func, u32 param1) { MOV(32, R(ABI_PARAM1), Imm32(param1)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -138,12 +139,11 @@ void XEmitter::ABI_CallFunctionC(const void *func, u32 param1) { } } -void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) { +void XEmitter::ABI_CallFunctionCC(const void* func, u32 param1, u32 param2) { MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -152,13 +152,12 @@ void XEmitter::ABI_CallFunctionCC(const void *func, u32 param1, u32 param2) { } } -void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 param3) { +void XEmitter::ABI_CallFunctionCCC(const void* func, u32 param1, u32 param2, u32 param3) { MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -167,13 +166,12 @@ void XEmitter::ABI_CallFunctionCCC(const void *func, u32 param1, u32 param2, u32 } } -void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, void *param3) { +void XEmitter::ABI_CallFunctionCCP(const void* func, u32 param1, u32 param2, void* param3) { MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(64, R(ABI_PARAM3), ImmPtr(param3)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -182,14 +180,14 @@ void XEmitter::ABI_CallFunctionCCP(const void *func, u32 param1, u32 param2, voi } } -void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u32 param3, void *param4) { +void XEmitter::ABI_CallFunctionCCCP(const void* func, u32 param1, u32 param2, u32 param3, + void* param4) { MOV(32, R(ABI_PARAM1), Imm32(param1)); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); MOV(64, R(ABI_PARAM4), ImmPtr(param4)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -198,11 +196,10 @@ void XEmitter::ABI_CallFunctionCCCP(const void *func, u32 param1, u32 param2, u3 } } -void XEmitter::ABI_CallFunctionP(const void *func, void *param1) { +void XEmitter::ABI_CallFunctionP(const void* func, void* param1) { MOV(64, R(ABI_PARAM1), ImmPtr(param1)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -211,13 +208,12 @@ void XEmitter::ABI_CallFunctionP(const void *func, void *param1) { } } -void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpArg &arg2) { +void XEmitter::ABI_CallFunctionPA(const void* func, void* param1, const Gen::OpArg& arg2) { MOV(64, R(ABI_PARAM1), ImmPtr(param1)); if (!arg2.IsSimpleReg(ABI_PARAM2)) MOV(32, R(ABI_PARAM2), arg2); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -226,15 +222,15 @@ void XEmitter::ABI_CallFunctionPA(const void *func, void *param1, const Gen::OpA } } -void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::OpArg &arg2, const Gen::OpArg &arg3) { +void XEmitter::ABI_CallFunctionPAA(const void* func, void* param1, const Gen::OpArg& arg2, + const Gen::OpArg& arg3) { MOV(64, R(ABI_PARAM1), ImmPtr(param1)); if (!arg2.IsSimpleReg(ABI_PARAM2)) MOV(32, R(ABI_PARAM2), arg2); if (!arg3.IsSimpleReg(ABI_PARAM3)) MOV(32, R(ABI_PARAM3), arg3); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -243,13 +239,12 @@ void XEmitter::ABI_CallFunctionPAA(const void *func, void *param1, const Gen::Op } } -void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, u32 param3) { +void XEmitter::ABI_CallFunctionPPC(const void* func, void* param1, void* param2, u32 param3) { MOV(64, R(ABI_PARAM1), ImmPtr(param1)); MOV(64, R(ABI_PARAM2), ImmPtr(param2)); MOV(32, R(ABI_PARAM3), Imm32(param3)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -259,12 +254,11 @@ void XEmitter::ABI_CallFunctionPPC(const void *func, void *param1, void *param2, } // Pass a register as a parameter. -void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) { +void XEmitter::ABI_CallFunctionR(const void* func, X64Reg reg1) { if (reg1 != ABI_PARAM1) MOV(32, R(ABI_PARAM1), R(reg1)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -274,7 +268,7 @@ void XEmitter::ABI_CallFunctionR(const void *func, X64Reg reg1) { } // Pass two registers as parameters. -void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) { +void XEmitter::ABI_CallFunctionRR(const void* func, X64Reg reg1, X64Reg reg2) { if (reg2 != ABI_PARAM1) { if (reg1 != ABI_PARAM1) MOV(64, R(ABI_PARAM1), R(reg1)); @@ -287,8 +281,7 @@ void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) { MOV(64, R(ABI_PARAM1), R(reg1)); } u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -297,14 +290,12 @@ void XEmitter::ABI_CallFunctionRR(const void *func, X64Reg reg1, X64Reg reg2) { } } -void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 param2) -{ +void XEmitter::ABI_CallFunctionAC(const void* func, const Gen::OpArg& arg1, u32 param2) { if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); MOV(32, R(ABI_PARAM2), Imm32(param2)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -313,15 +304,14 @@ void XEmitter::ABI_CallFunctionAC(const void *func, const Gen::OpArg &arg1, u32 } } -void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 param2, u32 param3) -{ +void XEmitter::ABI_CallFunctionACC(const void* func, const Gen::OpArg& arg1, u32 param2, + u32 param3) { if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); MOV(32, R(ABI_PARAM2), Imm32(param2)); MOV(64, R(ABI_PARAM3), Imm64(param3)); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -330,13 +320,11 @@ void XEmitter::ABI_CallFunctionACC(const void *func, const Gen::OpArg &arg1, u32 } } -void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) -{ +void XEmitter::ABI_CallFunctionA(const void* func, const Gen::OpArg& arg1) { if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); @@ -345,15 +333,14 @@ void XEmitter::ABI_CallFunctionA(const void *func, const Gen::OpArg &arg1) } } -void XEmitter::ABI_CallFunctionAA(const void *func, const Gen::OpArg &arg1, const Gen::OpArg &arg2) -{ +void XEmitter::ABI_CallFunctionAA(const void* func, const Gen::OpArg& arg1, + const Gen::OpArg& arg2) { if (!arg1.IsSimpleReg(ABI_PARAM1)) MOV(32, R(ABI_PARAM1), arg1); if (!arg2.IsSimpleReg(ABI_PARAM2)) MOV(32, R(ABI_PARAM2), arg2); u64 distance = u64(func) - (u64(code) + 5); - if (distance >= 0x0000000080000000ULL - && distance < 0xFFFFFFFF80000000ULL) { + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { // Far call MOV(64, R(RAX), ImmPtr(func)); CALLptr(R(RAX)); diff --git a/src/common/x64/abi.h b/src/common/x64/abi.h index de6d62fdd..eaaf81d89 100644 --- a/src/common/x64/abi.h +++ b/src/common/x64/abi.h @@ -12,7 +12,8 @@ // Windows 64-bit // * 4-reg "fastcall" variant, very new-skool stack handling -// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself calls_ +// * Callee moves stack pointer, to make room for shadow regs for the biggest function _it itself +// calls_ // * Parameters passed in RCX, RDX, ... further parameters are MOVed into the allocated stack space. // Scratch: RAX RCX RDX R8 R9 R10 R11 // Callee-save: RBX RSI RDI RBP R12 R13 R14 R15 @@ -35,10 +36,10 @@ #define ABI_PARAM4 R9 // xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers. -#define ABI_ALL_CALLER_SAVED \ - (BitSet32 { RAX, RCX, RDX, R8, R9, R10, R11, \ - XMM0+16, XMM1+16, XMM2+16, XMM3+16, XMM4+16, XMM5+16 }) -#else //64-bit Unix / OS X +#define ABI_ALL_CALLER_SAVED \ + (BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \ + XMM4 + 16, XMM5 + 16}) +#else // 64-bit Unix / OS X #define ABI_PARAM1 RDI #define ABI_PARAM2 RSI @@ -49,9 +50,7 @@ // TODO: Avoid pushing all 16 XMM registers when possible. Most functions we call probably // don't actually clobber them. -#define ABI_ALL_CALLER_SAVED \ - (BitSet32 { RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11 } | \ - ABI_ALL_FPRS) +#define ABI_ALL_CALLER_SAVED (BitSet32{RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11} | ABI_ALL_FPRS) #endif // WIN32 #define ABI_ALL_CALLEE_SAVED (~ABI_ALL_CALLER_SAVED) diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index d9c430c67..19f1a4030 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -15,8 +15,8 @@ namespace Common { #ifndef _MSC_VER #ifdef __FreeBSD__ -#include <sys/types.h> #include <machine/cpufunc.h> +#include <sys/types.h> #endif static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { @@ -26,15 +26,9 @@ static inline void __cpuidex(int info[4], int function_id, int subfunction_id) { #else info[0] = function_id; // eax info[2] = subfunction_id; // ecx - __asm__( - "cpuid" - : "=a" (info[0]), - "=b" (info[1]), - "=c" (info[2]), - "=d" (info[3]) - : "a" (function_id), - "c" (subfunction_id) - ); + __asm__("cpuid" + : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) + : "a"(function_id), "c"(subfunction_id)); #endif } @@ -88,14 +82,22 @@ static CPUCaps Detect() { if (max_std_fn >= 1) { __cpuid(cpu_id, 0x00000001); - if ((cpu_id[3] >> 25) & 1) caps.sse = true; - if ((cpu_id[3] >> 26) & 1) caps.sse2 = true; - if ((cpu_id[2]) & 1) caps.sse3 = true; - if ((cpu_id[2] >> 9) & 1) caps.ssse3 = true; - if ((cpu_id[2] >> 19) & 1) caps.sse4_1 = true; - if ((cpu_id[2] >> 20) & 1) caps.sse4_2 = true; - if ((cpu_id[2] >> 22) & 1) caps.movbe = true; - if ((cpu_id[2] >> 25) & 1) caps.aes = true; + if ((cpu_id[3] >> 25) & 1) + caps.sse = true; + if ((cpu_id[3] >> 26) & 1) + caps.sse2 = true; + if ((cpu_id[2]) & 1) + caps.sse3 = true; + if ((cpu_id[2] >> 9) & 1) + caps.ssse3 = true; + if ((cpu_id[2] >> 19) & 1) + caps.sse4_1 = true; + if ((cpu_id[2] >> 20) & 1) + caps.sse4_2 = true; + if ((cpu_id[2] >> 22) & 1) + caps.movbe = true; + if ((cpu_id[2] >> 25) & 1) + caps.aes = true; if ((cpu_id[3] >> 24) & 1) { caps.fxsave_fxrstor = true; @@ -140,10 +142,14 @@ static CPUCaps Detect() { if (max_ex_fn >= 0x80000001) { // Check for more features __cpuid(cpu_id, 0x80000001); - if (cpu_id[2] & 1) caps.lahf_sahf_64 = true; - if ((cpu_id[2] >> 5) & 1) caps.lzcnt = true; - if ((cpu_id[2] >> 16) & 1) caps.fma4 = true; - if ((cpu_id[3] >> 29) & 1) caps.long_mode = true; + if (cpu_id[2] & 1) + caps.lahf_sahf_64 = true; + if ((cpu_id[2] >> 5) & 1) + caps.lzcnt = true; + if ((cpu_id[2] >> 16) & 1) + caps.fma4 = true; + if ((cpu_id[3] >> 29) & 1) + caps.long_mode = true; } return caps; @@ -162,24 +168,38 @@ std::string GetCPUCapsString() { sum += caps.brand_string; sum += ")"; - if (caps.sse) sum += ", SSE"; + if (caps.sse) + sum += ", SSE"; if (caps.sse2) { sum += ", SSE2"; - if (!caps.flush_to_zero) sum += " (without DAZ)"; + if (!caps.flush_to_zero) + sum += " (without DAZ)"; } - if (caps.sse3) sum += ", SSE3"; - if (caps.ssse3) sum += ", SSSE3"; - if (caps.sse4_1) sum += ", SSE4.1"; - if (caps.sse4_2) sum += ", SSE4.2"; - if (caps.avx) sum += ", AVX"; - if (caps.avx2) sum += ", AVX2"; - if (caps.bmi1) sum += ", BMI1"; - if (caps.bmi2) sum += ", BMI2"; - if (caps.fma) sum += ", FMA"; - if (caps.aes) sum += ", AES"; - if (caps.movbe) sum += ", MOVBE"; - if (caps.long_mode) sum += ", 64-bit support"; + if (caps.sse3) + sum += ", SSE3"; + if (caps.ssse3) + sum += ", SSSE3"; + if (caps.sse4_1) + sum += ", SSE4.1"; + if (caps.sse4_2) + sum += ", SSE4.2"; + if (caps.avx) + sum += ", AVX"; + if (caps.avx2) + sum += ", AVX2"; + if (caps.bmi1) + sum += ", BMI1"; + if (caps.bmi2) + sum += ", BMI2"; + if (caps.fma) + sum += ", FMA"; + if (caps.aes) + sum += ", AES"; + if (caps.movbe) + sum += ", MOVBE"; + if (caps.long_mode) + sum += ", 64-bit support"; return sum; } diff --git a/src/common/x64/emitter.cpp b/src/common/x64/emitter.cpp index 5662f7f86..1a9fd6a6b 100644 --- a/src/common/x64/emitter.cpp +++ b/src/common/x64/emitter.cpp @@ -26,179 +26,162 @@ #include "cpu_detect.h" #include "emitter.h" -namespace Gen -{ +namespace Gen { -struct NormalOpDef -{ +struct NormalOpDef { u8 toRm8, toRm32, fromRm8, fromRm32, imm8, imm32, simm8, eaximm8, eaximm32, ext; }; // 0xCC is code for invalid combination of immediates -static const NormalOpDef normalops[11] = -{ - {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, //ADD - {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, //ADC +static const NormalOpDef normalops[11] = { + {0x00, 0x01, 0x02, 0x03, 0x80, 0x81, 0x83, 0x04, 0x05, 0}, // ADD + {0x10, 0x11, 0x12, 0x13, 0x80, 0x81, 0x83, 0x14, 0x15, 2}, // ADC - {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, //SUB - {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, //SBB + {0x28, 0x29, 0x2A, 0x2B, 0x80, 0x81, 0x83, 0x2C, 0x2D, 5}, // SUB + {0x18, 0x19, 0x1A, 0x1B, 0x80, 0x81, 0x83, 0x1C, 0x1D, 3}, // SBB - {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, //AND - {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, //OR + {0x20, 0x21, 0x22, 0x23, 0x80, 0x81, 0x83, 0x24, 0x25, 4}, // AND + {0x08, 0x09, 0x0A, 0x0B, 0x80, 0x81, 0x83, 0x0C, 0x0D, 1}, // OR - {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, //XOR - {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, //MOV + {0x30, 0x31, 0x32, 0x33, 0x80, 0x81, 0x83, 0x34, 0x35, 6}, // XOR + {0x88, 0x89, 0x8A, 0x8B, 0xC6, 0xC7, 0xCC, 0xCC, 0xCC, 0}, // MOV - {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, //TEST (to == from) - {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, //CMP + {0x84, 0x85, 0x84, 0x85, 0xF6, 0xF7, 0xCC, 0xA8, 0xA9, 0}, // TEST (to == from) + {0x38, 0x39, 0x3A, 0x3B, 0x80, 0x81, 0x83, 0x3C, 0x3D, 7}, // CMP - {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, //XCHG + {0x86, 0x87, 0x86, 0x87, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 7}, // XCHG }; -enum NormalSSEOps -{ - sseCMP = 0xC2, - sseADD = 0x58, //ADD - sseSUB = 0x5C, //SUB - sseAND = 0x54, //AND - sseANDN = 0x55, //ANDN - sseOR = 0x56, - sseXOR = 0x57, - sseMUL = 0x59, //MUL - sseDIV = 0x5E, //DIV - sseMIN = 0x5D, //MIN - sseMAX = 0x5F, //MAX - sseCOMIS = 0x2F, //COMIS - sseUCOMIS = 0x2E, //UCOMIS - sseSQRT = 0x51, //SQRT - sseRSQRT = 0x52, //RSQRT (NO DOUBLE PRECISION!!!) - sseRCP = 0x53, //RCP - sseMOVAPfromRM = 0x28, //MOVAP from RM - sseMOVAPtoRM = 0x29, //MOVAP to RM - sseMOVUPfromRM = 0x10, //MOVUP from RM - sseMOVUPtoRM = 0x11, //MOVUP to RM - sseMOVLPfromRM= 0x12, - sseMOVLPtoRM = 0x13, - sseMOVHPfromRM= 0x16, - sseMOVHPtoRM = 0x17, - sseMOVHLPS = 0x12, - sseMOVLHPS = 0x16, +enum NormalSSEOps { + sseCMP = 0xC2, + sseADD = 0x58, // ADD + sseSUB = 0x5C, // SUB + sseAND = 0x54, // AND + sseANDN = 0x55, // ANDN + sseOR = 0x56, + sseXOR = 0x57, + sseMUL = 0x59, // MUL + sseDIV = 0x5E, // DIV + sseMIN = 0x5D, // MIN + sseMAX = 0x5F, // MAX + sseCOMIS = 0x2F, // COMIS + sseUCOMIS = 0x2E, // UCOMIS + sseSQRT = 0x51, // SQRT + sseRSQRT = 0x52, // RSQRT (NO DOUBLE PRECISION!!!) + sseRCP = 0x53, // RCP + sseMOVAPfromRM = 0x28, // MOVAP from RM + sseMOVAPtoRM = 0x29, // MOVAP to RM + sseMOVUPfromRM = 0x10, // MOVUP from RM + sseMOVUPtoRM = 0x11, // MOVUP to RM + sseMOVLPfromRM = 0x12, + sseMOVLPtoRM = 0x13, + sseMOVHPfromRM = 0x16, + sseMOVHPtoRM = 0x17, + sseMOVHLPS = 0x12, + sseMOVLHPS = 0x16, sseMOVDQfromRM = 0x6F, - sseMOVDQtoRM = 0x7F, - sseMASKMOVDQU = 0xF7, - sseLDDQU = 0xF0, - sseSHUF = 0xC6, - sseMOVNTDQ = 0xE7, - sseMOVNTP = 0x2B, - sseHADD = 0x7C, + sseMOVDQtoRM = 0x7F, + sseMASKMOVDQU = 0xF7, + sseLDDQU = 0xF0, + sseSHUF = 0xC6, + sseMOVNTDQ = 0xE7, + sseMOVNTP = 0x2B, + sseHADD = 0x7C, }; - -void XEmitter::SetCodePtr(u8 *ptr) -{ +void XEmitter::SetCodePtr(u8* ptr) { code = ptr; } -const u8 *XEmitter::GetCodePtr() const -{ +const u8* XEmitter::GetCodePtr() const { return code; } -u8 *XEmitter::GetWritableCodePtr() -{ +u8* XEmitter::GetWritableCodePtr() { return code; } -void XEmitter::Write8(u8 value) -{ +void XEmitter::Write8(u8 value) { *code++ = value; } -void XEmitter::Write16(u16 value) -{ +void XEmitter::Write16(u16 value) { std::memcpy(code, &value, sizeof(u16)); code += sizeof(u16); } -void XEmitter::Write32(u32 value) -{ +void XEmitter::Write32(u32 value) { std::memcpy(code, &value, sizeof(u32)); code += sizeof(u32); } -void XEmitter::Write64(u64 value) -{ +void XEmitter::Write64(u64 value) { std::memcpy(code, &value, sizeof(u64)); code += sizeof(u64); } -void XEmitter::ReserveCodeSpace(int bytes) -{ +void XEmitter::ReserveCodeSpace(int bytes) { for (int i = 0; i < bytes; i++) *code++ = 0xCC; } -const u8 *XEmitter::AlignCode4() -{ +const u8* XEmitter::AlignCode4() { int c = int((u64)code & 3); if (c) - ReserveCodeSpace(4-c); + ReserveCodeSpace(4 - c); return code; } -const u8 *XEmitter::AlignCode16() -{ +const u8* XEmitter::AlignCode16() { int c = int((u64)code & 15); if (c) - ReserveCodeSpace(16-c); + ReserveCodeSpace(16 - c); return code; } -const u8 *XEmitter::AlignCodePage() -{ +const u8* XEmitter::AlignCodePage() { int c = int((u64)code & 4095); if (c) - ReserveCodeSpace(4096-c); + ReserveCodeSpace(4096 - c); return code; } // This operation modifies flags; check to see the flags are locked. // If the flags are locked, we should immediately and loudly fail before // causing a subtle JIT bug. -void XEmitter::CheckFlags() -{ +void XEmitter::CheckFlags() { ASSERT_MSG(!flags_locked, "Attempt to modify flags while flags locked!"); } -void XEmitter::WriteModRM(int mod, int reg, int rm) -{ +void XEmitter::WriteModRM(int mod, int reg, int rm) { Write8((u8)((mod << 6) | ((reg & 7) << 3) | (rm & 7))); } -void XEmitter::WriteSIB(int scale, int index, int base) -{ +void XEmitter::WriteSIB(int scale, int index, int base) { Write8((u8)((scale << 6) | ((index & 7) << 3) | (base & 7))); } -void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const -{ - if (customOp == -1) customOp = operandReg; +void OpArg::WriteRex(XEmitter* emit, int opBits, int bits, int customOp) const { + if (customOp == -1) + customOp = operandReg; #ifdef ARCHITECTURE_x86_64 u8 op = 0x40; // REX.W (whether operation is a 64-bit operation) - if (opBits == 64) op |= 8; + if (opBits == 64) + op |= 8; // REX.R (whether ModR/M reg field refers to R8-R15. - if (customOp & 8) op |= 4; + if (customOp & 8) + op |= 4; // REX.X (whether ModR/M SIB index field refers to R8-R15) - if (indexReg & 8) op |= 2; + if (indexReg & 8) + op |= 2; // REX.B (whether ModR/M rm or SIB base or opcode reg field refers to R8-R15) - if (offsetOrBaseReg & 8) op |= 1; + if (offsetOrBaseReg & 8) + op |= 1; // Write REX if wr have REX bits to write, or if the operation accesses // SIL, DIL, BPL, or SPL. - if (op != 0x40 || - (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || - (opBits == 8 && (customOp & 0x10c) == 4)) - { + if (op != 0x40 || (scale == SCALE_NONE && bits == 8 && (offsetOrBaseReg & 0x10c) == 4) || + (opBits == 8 && (customOp & 0x10c) == 4)) { emit->Write8(op); // Check the operation doesn't access AH, BH, CH, or DH. DEBUG_ASSERT((offsetOrBaseReg & 0x100) == 0); @@ -214,8 +197,8 @@ void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const #endif } -void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const -{ +void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, + int W) const { int R = !(regOp1 & 8); int X = !(indexReg & 8); int B = !(offsetOrBaseReg & 8); @@ -223,14 +206,11 @@ void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); // do we need any VEX fields that only appear in the three-byte form? - if (X == 1 && B == 1 && W == 0 && mmmmm == 1) - { + if (X == 1 && B == 1 && W == 0 && mmmmm == 1) { u8 RvvvvLpp = (R << 7) | (vvvv << 3) | (L << 2) | pp; emit->Write8(0xC5); emit->Write8(RvvvvLpp); - } - else - { + } else { u8 RXBmmmmm = (R << 7) | (X << 6) | (B << 5) | mmmmm; u8 WvvvvLpp = (W << 7) | (vvvv << 3) | (L << 2) | pp; emit->Write8(0xC4); @@ -239,31 +219,27 @@ void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp } } -void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, - bool warn_64bit_offset) const -{ +void OpArg::WriteRest(XEmitter* emit, int extraBytes, X64Reg _operandReg, + bool warn_64bit_offset) const { if (_operandReg == INVALID_REG) - _operandReg = (X64Reg)this->operandReg; + _operandReg = (X64Reg) this->operandReg; int mod = 0; int ireg = indexReg; bool SIB = false; int _offsetOrBaseReg = this->offsetOrBaseReg; - if (scale == SCALE_RIP) //Also, on 32-bit, just an immediate address + if (scale == SCALE_RIP) // Also, on 32-bit, just an immediate address { // Oh, RIP addressing. _offsetOrBaseReg = 5; emit->WriteModRM(0, _operandReg, _offsetOrBaseReg); - //TODO : add some checks +// TODO : add some checks #ifdef ARCHITECTURE_x86_64 u64 ripAddr = (u64)emit->GetCodePtr() + 4 + extraBytes; s64 distance = (s64)offset - (s64)ripAddr; - ASSERT_MSG( - (distance < 0x80000000LL && - distance >= -0x80000000LL) || - !warn_64bit_offset, - "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", - ripAddr, offset); + ASSERT_MSG((distance < 0x80000000LL && distance >= -0x80000000LL) || !warn_64bit_offset, + "WriteRest: op out of range (0x%" PRIx64 " uses 0x%" PRIx64 ")", ripAddr, + offset); s32 offs = (s32)distance; emit->Write32((u32)offs); #else @@ -272,66 +248,49 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, return; } - if (scale == 0) - { + if (scale == 0) { // Oh, no memory, Just a reg. - mod = 3; //11 - } - else if (scale >= 1) - { - //Ah good, no scaling. - if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) - { - //Okay, we're good. No SIB necessary. + mod = 3; // 11 + } else if (scale >= 1) { + // Ah good, no scaling. + if (scale == SCALE_ATREG && !((_offsetOrBaseReg & 7) == 4 || (_offsetOrBaseReg & 7) == 5)) { + // Okay, we're good. No SIB necessary. int ioff = (int)offset; - if (ioff == 0) - { + if (ioff == 0) { mod = 0; + } else if (ioff < -128 || ioff > 127) { + mod = 2; // 32-bit displacement + } else { + mod = 1; // 8-bit displacement } - else if (ioff<-128 || ioff>127) - { - mod = 2; //32-bit displacement - } - else - { - mod = 1; //8-bit displacement - } - } - else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) - { + } else if (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8) { SIB = true; mod = 0; _offsetOrBaseReg = 5; - } - else //if (scale != SCALE_ATREG) + } else // if (scale != SCALE_ATREG) { - if ((_offsetOrBaseReg & 7) == 4) //this would occupy the SIB encoding :( + if ((_offsetOrBaseReg & 7) == 4) // this would occupy the SIB encoding :( { - //So we have to fake it with SIB encoding :( + // So we have to fake it with SIB encoding :( SIB = true; } - if (scale >= SCALE_1 && scale < SCALE_ATREG) - { + if (scale >= SCALE_1 && scale < SCALE_ATREG) { SIB = true; } - if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) - { + if (scale == SCALE_ATREG && ((_offsetOrBaseReg & 7) == 4)) { SIB = true; ireg = _offsetOrBaseReg; } - //Okay, we're fine. Just disp encoding. - //We need displacement. Which size? + // Okay, we're fine. Just disp encoding. + // We need displacement. Which size? int ioff = (int)(s64)offset; - if (ioff < -128 || ioff > 127) - { - mod = 2; //32-bit displacement - } - else - { - mod = 1; //8-bit displacement + if (ioff < -128 || ioff > 127) { + mod = 2; // 32-bit displacement + } else { + mod = 1; // 8-bit displacement } } } @@ -343,36 +302,55 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, oreg = 4; // TODO(ector): WTF is this if about? I don't remember writing it :-) - //if (RIP) + // if (RIP) // oreg = 5; - emit->WriteModRM(mod, _operandReg&7, oreg&7); + emit->WriteModRM(mod, _operandReg & 7, oreg & 7); - if (SIB) - { - //SIB byte + if (SIB) { + // SIB byte int ss; - switch (scale) - { - case SCALE_NONE: _offsetOrBaseReg = 4; ss = 0; break; //RSP - case SCALE_1: ss = 0; break; - case SCALE_2: ss = 1; break; - case SCALE_4: ss = 2; break; - case SCALE_8: ss = 3; break; - case SCALE_NOBASE_2: ss = 1; break; - case SCALE_NOBASE_4: ss = 2; break; - case SCALE_NOBASE_8: ss = 3; break; - case SCALE_ATREG: ss = 0; break; - default: ASSERT_MSG(0, "Invalid scale for SIB byte"); ss = 0; break; + switch (scale) { + case SCALE_NONE: + _offsetOrBaseReg = 4; + ss = 0; + break; // RSP + case SCALE_1: + ss = 0; + break; + case SCALE_2: + ss = 1; + break; + case SCALE_4: + ss = 2; + break; + case SCALE_8: + ss = 3; + break; + case SCALE_NOBASE_2: + ss = 1; + break; + case SCALE_NOBASE_4: + ss = 2; + break; + case SCALE_NOBASE_8: + ss = 3; + break; + case SCALE_ATREG: + ss = 0; + break; + default: + ASSERT_MSG(0, "Invalid scale for SIB byte"); + ss = 0; + break; } - emit->Write8((u8)((ss << 6) | ((ireg&7)<<3) | (_offsetOrBaseReg&7))); + emit->Write8((u8)((ss << 6) | ((ireg & 7) << 3) | (_offsetOrBaseReg & 7))); } - if (mod == 1) //8-bit disp + if (mod == 1) // 8-bit disp { emit->Write8((u8)(s8)(s32)offset); - } - else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) //32-bit disp + } else if (mod == 2 || (scale >= SCALE_NOBASE_2 && scale <= SCALE_NOBASE_8)) // 32-bit disp { emit->Write32((u32)offset); } @@ -382,8 +360,7 @@ void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, // R = register# upper bit // X = scale amnt upper bit // B = base register# upper bit -void XEmitter::Rex(int w, int r, int x, int b) -{ +void XEmitter::Rex(int w, int r, int x, int b) { w = w ? 1 : 0; r = r ? 1 : 0; x = x ? 1 : 0; @@ -393,70 +370,60 @@ void XEmitter::Rex(int w, int r, int x, int b) Write8(rx); } -void XEmitter::JMP(const u8* addr, bool force5Bytes) -{ +void XEmitter::JMP(const u8* addr, bool force5Bytes) { u64 fn = (u64)addr; - if (!force5Bytes) - { + if (!force5Bytes) { s64 distance = (s64)(fn - ((u64)code + 2)); ASSERT_MSG(distance >= -0x80 && distance < 0x80, - "Jump target too far away, needs force5Bytes = true"); - //8 bits will do + "Jump target too far away, needs force5Bytes = true"); + // 8 bits will do Write8(0xEB); Write8((u8)(s8)distance); - } - else - { + } else { s64 distance = (s64)(fn - ((u64)code + 5)); - ASSERT_MSG( - distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); + ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away, needs indirect register"); Write8(0xE9); Write32((u32)(s32)distance); } } -void XEmitter::JMPptr(const OpArg& arg2) -{ +void XEmitter::JMPptr(const OpArg& arg2) { OpArg arg = arg2; - if (arg.IsImm()) ASSERT_MSG(0, "JMPptr - Imm argument"); + if (arg.IsImm()) + ASSERT_MSG(0, "JMPptr - Imm argument"); arg.operandReg = 4; arg.WriteRex(this, 0, 0); Write8(0xFF); arg.WriteRest(this); } -//Can be used to trap other processors, before overwriting their code +// Can be used to trap other processors, before overwriting their code // not used in dolphin -void XEmitter::JMPself() -{ +void XEmitter::JMPself() { Write8(0xEB); Write8(0xFE); } -void XEmitter::CALLptr(OpArg arg) -{ - if (arg.IsImm()) ASSERT_MSG(0, "CALLptr - Imm argument"); +void XEmitter::CALLptr(OpArg arg) { + if (arg.IsImm()) + ASSERT_MSG(0, "CALLptr - Imm argument"); arg.operandReg = 2; arg.WriteRex(this, 0, 0); Write8(0xFF); arg.WriteRest(this); } -void XEmitter::CALL(const void* fnptr) -{ +void XEmitter::CALL(const void* fnptr) { u64 distance = u64(fnptr) - (u64(code) + 5); - ASSERT_MSG( - distance < 0x0000000080000000ULL || - distance >= 0xFFFFFFFF80000000ULL, - "CALL out of range (%p calls %p)", code, fnptr); + ASSERT_MSG(distance < 0x0000000080000000ULL || distance >= 0xFFFFFFFF80000000ULL, + "CALL out of range (%p calls %p)", code, fnptr); Write8(0xE8); Write32(u32(distance)); } -FixupBranch XEmitter::CALL() -{ +FixupBranch XEmitter::CALL() { FixupBranch branch; branch.type = 1; branch.ptr = code + 5; @@ -467,38 +434,30 @@ FixupBranch XEmitter::CALL() return branch; } -FixupBranch XEmitter::J(bool force5bytes) -{ +FixupBranch XEmitter::J(bool force5bytes) { FixupBranch branch; branch.type = force5bytes ? 1 : 0; branch.ptr = code + (force5bytes ? 5 : 2); - if (!force5bytes) - { - //8 bits will do + if (!force5bytes) { + // 8 bits will do Write8(0xEB); Write8(0); - } - else - { + } else { Write8(0xE9); Write32(0); } return branch; } -FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) -{ +FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) { FixupBranch branch; branch.type = force5bytes ? 1 : 0; branch.ptr = code + (force5bytes ? 6 : 2); - if (!force5bytes) - { - //8 bits will do + if (!force5bytes) { + // 8 bits will do Write8(0x70 + conditionCode); Write8(0); - } - else - { + } else { Write8(0x0F); Write8(0x80 + conditionCode); Write32(0); @@ -506,198 +465,268 @@ FixupBranch XEmitter::J_CC(CCFlags conditionCode, bool force5bytes) return branch; } -void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) -{ +void XEmitter::J_CC(CCFlags conditionCode, const u8* addr, bool force5bytes) { u64 fn = (u64)addr; s64 distance = (s64)(fn - ((u64)code + 2)); - if (distance < -0x80 || distance >= 0x80 || force5bytes) - { + if (distance < -0x80 || distance >= 0x80 || force5bytes) { distance = (s64)(fn - ((u64)code + 6)); - ASSERT_MSG( - distance >= -0x80000000LL && distance < 0x80000000LL, - "Jump target too far away, needs indirect register"); + ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away, needs indirect register"); Write8(0x0F); Write8(0x80 + conditionCode); Write32((u32)(s32)distance); - } - else - { + } else { Write8(0x70 + conditionCode); Write8((u8)(s8)distance); } } -void XEmitter::SetJumpTarget(const FixupBranch& branch) -{ - if (branch.type == 0) - { +void XEmitter::SetJumpTarget(const FixupBranch& branch) { + if (branch.type == 0) { s64 distance = (s64)(code - branch.ptr); - ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); + ASSERT_MSG(distance >= -0x80 && distance < 0x80, + "Jump target too far away, needs force5Bytes = true"); branch.ptr[-1] = (u8)(s8)distance; - } - else if (branch.type == 1) - { + } else if (branch.type == 1) { s64 distance = (s64)(code - branch.ptr); - ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); + ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away, needs indirect register"); ((s32*)branch.ptr)[-1] = (s32)distance; } } -void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) -{ - if (branch.type == 0) - { +void XEmitter::SetJumpTarget(const FixupBranch& branch, const u8* target) { + if (branch.type == 0) { s64 distance = (s64)(target - branch.ptr); - ASSERT_MSG(distance >= -0x80 && distance < 0x80, "Jump target too far away, needs force5Bytes = true"); + ASSERT_MSG(distance >= -0x80 && distance < 0x80, + "Jump target too far away, needs force5Bytes = true"); branch.ptr[-1] = (u8)(s8)distance; - } - else if (branch.type == 1) - { + } else if (branch.type == 1) { s64 distance = (s64)(target - branch.ptr); - ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, "Jump target too far away, needs indirect register"); + ASSERT_MSG(distance >= -0x80000000LL && distance < 0x80000000LL, + "Jump target too far away, needs indirect register"); ((s32*)branch.ptr)[-1] = (s32)distance; } } -//Single byte opcodes -//There is no PUSHAD/POPAD in 64-bit mode. -void XEmitter::INT3() {Write8(0xCC);} -void XEmitter::RET() {Write8(0xC3);} -void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret +// Single byte opcodes +// There is no PUSHAD/POPAD in 64-bit mode. +void XEmitter::INT3() { + Write8(0xCC); +} +void XEmitter::RET() { + Write8(0xC3); +} +void XEmitter::RET_FAST() { + Write8(0xF3); + Write8(0xC3); +} // two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a + // ret // The first sign of decadence: optimized NOPs. -void XEmitter::NOP(size_t size) -{ +void XEmitter::NOP(size_t size) { DEBUG_ASSERT((int)size > 0); - while (true) - { - switch (size) - { + while (true) { + switch (size) { case 0: return; case 1: Write8(0x90); return; case 2: - Write8(0x66); Write8(0x90); + Write8(0x66); + Write8(0x90); return; case 3: - Write8(0x0F); Write8(0x1F); Write8(0x00); + Write8(0x0F); + Write8(0x1F); + Write8(0x00); return; case 4: - Write8(0x0F); Write8(0x1F); Write8(0x40); Write8(0x00); + Write8(0x0F); + Write8(0x1F); + Write8(0x40); + Write8(0x00); return; case 5: - Write8(0x0F); Write8(0x1F); Write8(0x44); Write8(0x00); + Write8(0x0F); + Write8(0x1F); + Write8(0x44); + Write8(0x00); Write8(0x00); return; case 6: - Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x44); - Write8(0x00); Write8(0x00); + Write8(0x66); + Write8(0x0F); + Write8(0x1F); + Write8(0x44); + Write8(0x00); + Write8(0x00); return; case 7: - Write8(0x0F); Write8(0x1F); Write8(0x80); Write8(0x00); - Write8(0x00); Write8(0x00); Write8(0x00); + Write8(0x0F); + Write8(0x1F); + Write8(0x80); + Write8(0x00); + Write8(0x00); + Write8(0x00); + Write8(0x00); return; case 8: - Write8(0x0F); Write8(0x1F); Write8(0x84); Write8(0x00); - Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); + Write8(0x0F); + Write8(0x1F); + Write8(0x84); + Write8(0x00); + Write8(0x00); + Write8(0x00); + Write8(0x00); + Write8(0x00); return; case 9: - Write8(0x66); Write8(0x0F); Write8(0x1F); Write8(0x84); - Write8(0x00); Write8(0x00); Write8(0x00); Write8(0x00); + Write8(0x66); + Write8(0x0F); + Write8(0x1F); + Write8(0x84); + Write8(0x00); + Write8(0x00); + Write8(0x00); + Write8(0x00); Write8(0x00); return; case 10: - Write8(0x66); Write8(0x66); Write8(0x0F); Write8(0x1F); - Write8(0x84); Write8(0x00); Write8(0x00); Write8(0x00); - Write8(0x00); Write8(0x00); + Write8(0x66); + Write8(0x66); + Write8(0x0F); + Write8(0x1F); + Write8(0x84); + Write8(0x00); + Write8(0x00); + Write8(0x00); + Write8(0x00); + Write8(0x00); return; default: // Even though x86 instructions are allowed to be up to 15 bytes long, // AMD advises against using NOPs longer than 11 bytes because they // carry a performance penalty on CPUs older than AMD family 16h. - Write8(0x66); Write8(0x66); Write8(0x66); Write8(0x0F); - Write8(0x1F); Write8(0x84); Write8(0x00); Write8(0x00); - Write8(0x00); Write8(0x00); Write8(0x00); + Write8(0x66); + Write8(0x66); + Write8(0x66); + Write8(0x0F); + Write8(0x1F); + Write8(0x84); + Write8(0x00); + Write8(0x00); + Write8(0x00); + Write8(0x00); + Write8(0x00); size -= 11; continue; } } } -void XEmitter::PAUSE() {Write8(0xF3); NOP();} //use in tight spinloops for energy saving on some cpu -void XEmitter::CLC() {CheckFlags(); Write8(0xF8);} //clear carry -void XEmitter::CMC() {CheckFlags(); Write8(0xF5);} //flip carry -void XEmitter::STC() {CheckFlags(); Write8(0xF9);} //set carry +void XEmitter::PAUSE() { + Write8(0xF3); + NOP(); +} // use in tight spinloops for energy saving on some cpu +void XEmitter::CLC() { + CheckFlags(); + Write8(0xF8); +} // clear carry +void XEmitter::CMC() { + CheckFlags(); + Write8(0xF5); +} // flip carry +void XEmitter::STC() { + CheckFlags(); + Write8(0xF9); +} // set carry -//TODO: xchg ah, al ??? -void XEmitter::XCHG_AHAL() -{ +// TODO: xchg ah, al ??? +void XEmitter::XCHG_AHAL() { Write8(0x86); Write8(0xe0); // alt. 86 c4 } -//These two can not be executed on early Intel 64-bit CPU:s, only on AMD! -void XEmitter::LAHF() {Write8(0x9F);} -void XEmitter::SAHF() {CheckFlags(); Write8(0x9E);} +// These two can not be executed on early Intel 64-bit CPU:s, only on AMD! +void XEmitter::LAHF() { + Write8(0x9F); +} +void XEmitter::SAHF() { + CheckFlags(); + Write8(0x9E); +} -void XEmitter::PUSHF() {Write8(0x9C);} -void XEmitter::POPF() {CheckFlags(); Write8(0x9D);} +void XEmitter::PUSHF() { + Write8(0x9C); +} +void XEmitter::POPF() { + CheckFlags(); + Write8(0x9D); +} -void XEmitter::LFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xE8);} -void XEmitter::MFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF0);} -void XEmitter::SFENCE() {Write8(0x0F); Write8(0xAE); Write8(0xF8);} +void XEmitter::LFENCE() { + Write8(0x0F); + Write8(0xAE); + Write8(0xE8); +} +void XEmitter::MFENCE() { + Write8(0x0F); + Write8(0xAE); + Write8(0xF0); +} +void XEmitter::SFENCE() { + Write8(0x0F); + Write8(0xAE); + Write8(0xF8); +} -void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) -{ +void XEmitter::WriteSimple1Byte(int bits, u8 byte, X64Reg reg) { if (bits == 16) Write8(0x66); Rex(bits == 64, 0, 0, (int)reg >> 3); Write8(byte + ((int)reg & 7)); } -void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) -{ +void XEmitter::WriteSimple2Byte(int bits, u8 byte1, u8 byte2, X64Reg reg) { if (bits == 16) Write8(0x66); - Rex(bits==64, 0, 0, (int)reg >> 3); + Rex(bits == 64, 0, 0, (int)reg >> 3); Write8(byte1); Write8(byte2 + ((int)reg & 7)); } -void XEmitter::CWD(int bits) -{ +void XEmitter::CWD(int bits) { if (bits == 16) Write8(0x66); Rex(bits == 64, 0, 0, 0); Write8(0x99); } -void XEmitter::CBW(int bits) -{ +void XEmitter::CBW(int bits) { if (bits == 8) Write8(0x66); Rex(bits == 32, 0, 0, 0); Write8(0x98); } -//Simple opcodes - +// Simple opcodes -//push/pop do not need wide to be 64-bit -void XEmitter::PUSH(X64Reg reg) {WriteSimple1Byte(32, 0x50, reg);} -void XEmitter::POP(X64Reg reg) {WriteSimple1Byte(32, 0x58, reg);} +// push/pop do not need wide to be 64-bit +void XEmitter::PUSH(X64Reg reg) { + WriteSimple1Byte(32, 0x50, reg); +} +void XEmitter::POP(X64Reg reg) { + WriteSimple1Byte(32, 0x58, reg); +} -void XEmitter::PUSH(int bits, const OpArg& reg) -{ +void XEmitter::PUSH(int bits, const OpArg& reg) { if (reg.IsSimpleReg()) PUSH(reg.GetSimpleReg()); - else if (reg.IsImm()) - { - switch (reg.GetImmBits()) - { + else if (reg.IsImm()) { + switch (reg.GetImmBits()) { case 8: Write8(0x6A); Write8((u8)(s8)reg.offset); @@ -715,9 +744,7 @@ void XEmitter::PUSH(int bits, const OpArg& reg) ASSERT_MSG(0, "PUSH - Bad imm bits"); break; } - } - else - { + } else { if (bits == 16) Write8(0x66); reg.WriteRex(this, bits, bits); @@ -726,44 +753,33 @@ void XEmitter::PUSH(int bits, const OpArg& reg) } } -void XEmitter::POP(int /*bits*/, const OpArg& reg) -{ +void XEmitter::POP(int /*bits*/, const OpArg& reg) { if (reg.IsSimpleReg()) POP(reg.GetSimpleReg()); else ASSERT_MSG(0, "POP - Unsupported encoding"); } -void XEmitter::BSWAP(int bits, X64Reg reg) -{ - if (bits >= 32) - { +void XEmitter::BSWAP(int bits, X64Reg reg) { + if (bits >= 32) { WriteSimple2Byte(bits, 0x0F, 0xC8, reg); - } - else if (bits == 16) - { + } else if (bits == 16) { ROL(16, R(reg), Imm8(8)); - } - else if (bits == 8) - { + } else if (bits == 8) { // Do nothing - can't bswap a single byte... - } - else - { + } else { ASSERT_MSG(0, "BSWAP - Wrong number of bits"); } } // Undefined opcode - reserved // If we ever need a way to always cause a non-breakpoint hard exception... -void XEmitter::UD2() -{ +void XEmitter::UD2() { Write8(0x0F); Write8(0x0B); } -void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) -{ +void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) { ASSERT_MSG(!arg.IsImm(), "PREFETCH - Imm argument"); arg.operandReg = (u8)level; arg.WriteRex(this, 0, 0); @@ -772,8 +788,7 @@ void XEmitter::PREFETCH(PrefetchLevel level, OpArg arg) arg.WriteRest(this); } -void XEmitter::SETcc(CCFlags flag, OpArg dest) -{ +void XEmitter::SETcc(CCFlags flag, OpArg dest) { ASSERT_MSG(!dest.IsImm(), "SETcc - Imm argument"); dest.operandReg = 0; dest.WriteRex(this, 0, 8); @@ -782,8 +797,7 @@ void XEmitter::SETcc(CCFlags flag, OpArg dest) dest.WriteRest(this); } -void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) -{ +void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) { ASSERT_MSG(!src.IsImm(), "CMOVcc - Imm argument"); ASSERT_MSG(bits != 8, "CMOVcc - 8 bits unsupported"); if (bits == 16) @@ -795,34 +809,41 @@ void XEmitter::CMOVcc(int bits, X64Reg dest, OpArg src, CCFlags flag) src.WriteRest(this); } -void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) -{ +void XEmitter::WriteMulDivType(int bits, OpArg src, int ext) { ASSERT_MSG(!src.IsImm(), "WriteMulDivType - Imm argument"); CheckFlags(); src.operandReg = ext; if (bits == 16) Write8(0x66); src.WriteRex(this, bits, bits, 0); - if (bits == 8) - { + if (bits == 8) { Write8(0xF6); - } - else - { + } else { Write8(0xF7); } src.WriteRest(this); } -void XEmitter::MUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 4);} -void XEmitter::DIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 6);} -void XEmitter::IMUL(int bits, const OpArg& src) {WriteMulDivType(bits, src, 5);} -void XEmitter::IDIV(int bits, const OpArg& src) {WriteMulDivType(bits, src, 7);} -void XEmitter::NEG(int bits, const OpArg& src) {WriteMulDivType(bits, src, 3);} -void XEmitter::NOT(int bits, const OpArg& src) {WriteMulDivType(bits, src, 2);} +void XEmitter::MUL(int bits, const OpArg& src) { + WriteMulDivType(bits, src, 4); +} +void XEmitter::DIV(int bits, const OpArg& src) { + WriteMulDivType(bits, src, 6); +} +void XEmitter::IMUL(int bits, const OpArg& src) { + WriteMulDivType(bits, src, 5); +} +void XEmitter::IDIV(int bits, const OpArg& src) { + WriteMulDivType(bits, src, 7); +} +void XEmitter::NEG(int bits, const OpArg& src) { + WriteMulDivType(bits, src, 3); +} +void XEmitter::NOT(int bits, const OpArg& src) { + WriteMulDivType(bits, src, 2); +} -void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) -{ +void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bool rep) { ASSERT_MSG(!src.IsImm(), "WriteBitSearchType - Imm argument"); CheckFlags(); src.operandReg = (u8)dest; @@ -836,36 +857,35 @@ void XEmitter::WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2, bo src.WriteRest(this); } -void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) -{ +void XEmitter::MOVNTI(int bits, const OpArg& dest, X64Reg src) { if (bits <= 16) ASSERT_MSG(0, "MOVNTI - bits<=16"); WriteBitSearchType(bits, src, dest, 0xC3); } -void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBC);} // Bottom bit to top bit -void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) {WriteBitSearchType(bits,dest,src,0xBD);} // Top bit to bottom bit +void XEmitter::BSF(int bits, X64Reg dest, const OpArg& src) { + WriteBitSearchType(bits, dest, src, 0xBC); +} // Bottom bit to top bit +void XEmitter::BSR(int bits, X64Reg dest, const OpArg& src) { + WriteBitSearchType(bits, dest, src, 0xBD); +} // Top bit to bottom bit -void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) -{ +void XEmitter::TZCNT(int bits, X64Reg dest, const OpArg& src) { CheckFlags(); if (!Common::GetCPUCaps().bmi1) ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); WriteBitSearchType(bits, dest, src, 0xBC, true); } -void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) -{ +void XEmitter::LZCNT(int bits, X64Reg dest, const OpArg& src) { CheckFlags(); if (!Common::GetCPUCaps().lzcnt) ASSERT_MSG(0, "Trying to use LZCNT on a system that doesn't support it. Bad programmer."); WriteBitSearchType(bits, dest, src, 0xBD, true); } -void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) -{ +void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) { ASSERT_MSG(!src.IsImm(), "MOVSX - Imm argument"); - if (dbits == sbits) - { + if (dbits == sbits) { MOV(dbits, R(dest), src); return; } @@ -873,66 +893,49 @@ void XEmitter::MOVSX(int dbits, int sbits, X64Reg dest, OpArg src) if (dbits == 16) Write8(0x66); src.WriteRex(this, dbits, sbits); - if (sbits == 8) - { + if (sbits == 8) { Write8(0x0F); Write8(0xBE); - } - else if (sbits == 16) - { + } else if (sbits == 16) { Write8(0x0F); Write8(0xBF); - } - else if (sbits == 32 && dbits == 64) - { + } else if (sbits == 32 && dbits == 64) { Write8(0x63); - } - else - { + } else { Crash(); } src.WriteRest(this); } -void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) -{ +void XEmitter::MOVZX(int dbits, int sbits, X64Reg dest, OpArg src) { ASSERT_MSG(!src.IsImm(), "MOVZX - Imm argument"); - if (dbits == sbits) - { + if (dbits == sbits) { MOV(dbits, R(dest), src); return; } src.operandReg = (u8)dest; if (dbits == 16) Write8(0x66); - //the 32bit result is automatically zero extended to 64bit + // the 32bit result is automatically zero extended to 64bit src.WriteRex(this, dbits == 64 ? 32 : dbits, sbits); - if (sbits == 8) - { + if (sbits == 8) { Write8(0x0F); Write8(0xB6); - } - else if (sbits == 16) - { + } else if (sbits == 16) { Write8(0x0F); Write8(0xB7); - } - else if (sbits == 32 && dbits == 64) - { + } else if (sbits == 32 && dbits == 64) { Write8(0x8B); - } - else - { + } else { ASSERT_MSG(0, "MOVZX - Invalid size"); } src.WriteRest(this); } -void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) -{ - ASSERT_MSG(Common::GetCPUCaps().movbe, "Generating MOVBE on a system that does not support it."); - if (bits == 8) - { +void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) { + ASSERT_MSG(Common::GetCPUCaps().movbe, + "Generating MOVBE on a system that does not support it."); + if (bits == 8) { MOV(bits, dest, src); return; } @@ -940,71 +943,60 @@ void XEmitter::MOVBE(int bits, const OpArg& dest, const OpArg& src) if (bits == 16) Write8(0x66); - if (dest.IsSimpleReg()) - { + if (dest.IsSimpleReg()) { ASSERT_MSG(!src.IsSimpleReg() && !src.IsImm(), "MOVBE: Loading from !mem"); src.WriteRex(this, bits, bits, dest.GetSimpleReg()); - Write8(0x0F); Write8(0x38); Write8(0xF0); + Write8(0x0F); + Write8(0x38); + Write8(0xF0); src.WriteRest(this, 0, dest.GetSimpleReg()); - } - else if (src.IsSimpleReg()) - { + } else if (src.IsSimpleReg()) { ASSERT_MSG(!dest.IsSimpleReg() && !dest.IsImm(), "MOVBE: Storing to !mem"); dest.WriteRex(this, bits, bits, src.GetSimpleReg()); - Write8(0x0F); Write8(0x38); Write8(0xF1); + Write8(0x0F); + Write8(0x38); + Write8(0xF1); dest.WriteRest(this, 0, src.GetSimpleReg()); - } - else - { + } else { ASSERT_MSG(0, "MOVBE: Not loading or storing to mem"); } } - -void XEmitter::LEA(int bits, X64Reg dest, OpArg src) -{ +void XEmitter::LEA(int bits, X64Reg dest, OpArg src) { ASSERT_MSG(!src.IsImm(), "LEA - Imm argument"); src.operandReg = (u8)dest; if (bits == 16) - Write8(0x66); //TODO: performance warning + Write8(0x66); // TODO: performance warning src.WriteRex(this, bits, bits); Write8(0x8D); src.WriteRest(this, 0, INVALID_REG, bits == 64); } -//shift can be either imm8 or cl -void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) -{ +// shift can be either imm8 or cl +void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) { CheckFlags(); bool writeImm = false; - if (dest.IsImm()) - { + if (dest.IsImm()) { ASSERT_MSG(0, "WriteShift - can't shift imms"); } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) - { + if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || + (shift.IsImm() && shift.GetImmBits() != 8)) { ASSERT_MSG(0, "WriteShift - illegal argument"); } dest.operandReg = ext; if (bits == 16) Write8(0x66); dest.WriteRex(this, bits, bits, 0); - if (shift.GetImmBits() == 8) - { - //ok an imm + if (shift.GetImmBits() == 8) { + // ok an imm u8 imm = (u8)shift.offset; - if (imm == 1) - { + if (imm == 1) { Write8(bits == 8 ? 0xD0 : 0xD1); - } - else - { + } else { writeImm = true; Write8(bits == 8 ? 0xC0 : 0xC1); } - } - else - { + } else { Write8(bits == 8 ? 0xD2 : 0xD3); } dest.WriteRest(this, writeImm ? 1 : 0); @@ -1014,116 +1006,125 @@ void XEmitter::WriteShift(int bits, OpArg dest, const OpArg& shift, int ext) // large rotates and shift are slower on intel than amd // intel likes to rotate by 1, and the op is smaller too -void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 0);} -void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 1);} -void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 2);} -void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 3);} -void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 4);} -void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 5);} -void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) {WriteShift(bits, dest, shift, 7);} +void XEmitter::ROL(int bits, const OpArg& dest, const OpArg& shift) { + WriteShift(bits, dest, shift, 0); +} +void XEmitter::ROR(int bits, const OpArg& dest, const OpArg& shift) { + WriteShift(bits, dest, shift, 1); +} +void XEmitter::RCL(int bits, const OpArg& dest, const OpArg& shift) { + WriteShift(bits, dest, shift, 2); +} +void XEmitter::RCR(int bits, const OpArg& dest, const OpArg& shift) { + WriteShift(bits, dest, shift, 3); +} +void XEmitter::SHL(int bits, const OpArg& dest, const OpArg& shift) { + WriteShift(bits, dest, shift, 4); +} +void XEmitter::SHR(int bits, const OpArg& dest, const OpArg& shift) { + WriteShift(bits, dest, shift, 5); +} +void XEmitter::SAR(int bits, const OpArg& dest, const OpArg& shift) { + WriteShift(bits, dest, shift, 7); +} // index can be either imm8 or register, don't use memory destination because it's slow -void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) -{ +void XEmitter::WriteBitTest(int bits, const OpArg& dest, const OpArg& index, int ext) { CheckFlags(); - if (dest.IsImm()) - { + if (dest.IsImm()) { ASSERT_MSG(0, "WriteBitTest - can't test imms"); } - if ((index.IsImm() && index.GetImmBits() != 8)) - { + if ((index.IsImm() && index.GetImmBits() != 8)) { ASSERT_MSG(0, "WriteBitTest - illegal argument"); } if (bits == 16) Write8(0x66); - if (index.IsImm()) - { + if (index.IsImm()) { dest.WriteRex(this, bits, bits); - Write8(0x0F); Write8(0xBA); + Write8(0x0F); + Write8(0xBA); dest.WriteRest(this, 1, (X64Reg)ext); Write8((u8)index.offset); - } - else - { + } else { X64Reg operand = index.GetSimpleReg(); dest.WriteRex(this, bits, bits, operand); - Write8(0x0F); Write8(0x83 + 8*ext); + Write8(0x0F); + Write8(0x83 + 8 * ext); dest.WriteRest(this, 1, operand); } } -void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 4);} -void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 5);} -void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 6);} -void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) {WriteBitTest(bits, dest, index, 7);} +void XEmitter::BT(int bits, const OpArg& dest, const OpArg& index) { + WriteBitTest(bits, dest, index, 4); +} +void XEmitter::BTS(int bits, const OpArg& dest, const OpArg& index) { + WriteBitTest(bits, dest, index, 5); +} +void XEmitter::BTR(int bits, const OpArg& dest, const OpArg& index) { + WriteBitTest(bits, dest, index, 6); +} +void XEmitter::BTC(int bits, const OpArg& dest, const OpArg& index) { + WriteBitTest(bits, dest, index, 7); +} -//shift can be either imm8 or cl -void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) -{ +// shift can be either imm8 or cl +void XEmitter::SHRD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) { CheckFlags(); - if (dest.IsImm()) - { + if (dest.IsImm()) { ASSERT_MSG(0, "SHRD - can't use imms as destination"); } - if (!src.IsSimpleReg()) - { + if (!src.IsSimpleReg()) { ASSERT_MSG(0, "SHRD - must use simple register as source"); } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) - { + if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || + (shift.IsImm() && shift.GetImmBits() != 8)) { ASSERT_MSG(0, "SHRD - illegal shift"); } if (bits == 16) Write8(0x66); X64Reg operand = src.GetSimpleReg(); dest.WriteRex(this, bits, bits, operand); - if (shift.GetImmBits() == 8) - { - Write8(0x0F); Write8(0xAC); + if (shift.GetImmBits() == 8) { + Write8(0x0F); + Write8(0xAC); dest.WriteRest(this, 1, operand); Write8((u8)shift.offset); - } - else - { - Write8(0x0F); Write8(0xAD); + } else { + Write8(0x0F); + Write8(0xAD); dest.WriteRest(this, 0, operand); } } -void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) -{ +void XEmitter::SHLD(int bits, const OpArg& dest, const OpArg& src, const OpArg& shift) { CheckFlags(); - if (dest.IsImm()) - { + if (dest.IsImm()) { ASSERT_MSG(0, "SHLD - can't use imms as destination"); } - if (!src.IsSimpleReg()) - { + if (!src.IsSimpleReg()) { ASSERT_MSG(0, "SHLD - must use simple register as source"); } - if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || (shift.IsImm() && shift.GetImmBits() != 8)) - { + if ((shift.IsSimpleReg() && shift.GetSimpleReg() != ECX) || + (shift.IsImm() && shift.GetImmBits() != 8)) { ASSERT_MSG(0, "SHLD - illegal shift"); } if (bits == 16) Write8(0x66); X64Reg operand = src.GetSimpleReg(); dest.WriteRex(this, bits, bits, operand); - if (shift.GetImmBits() == 8) - { - Write8(0x0F); Write8(0xA4); + if (shift.GetImmBits() == 8) { + Write8(0x0F); + Write8(0xA4); dest.WriteRest(this, 1, operand); Write8((u8)shift.offset); - } - else - { - Write8(0x0F); Write8(0xA5); + } else { + Write8(0x0F); + Write8(0xA5); dest.WriteRest(this, 0, operand); } } -void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bits) -{ +void OpArg::WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg _operandReg, int bits) { if (bits == 16) emit->Write8(0x66); @@ -1133,12 +1134,11 @@ void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bit WriteRest(emit); } -//operand can either be immediate or register -void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& operand, int bits) const -{ +// operand can either be immediate or register +void OpArg::WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, + int bits) const { X64Reg _operandReg; - if (IsImm()) - { + if (IsImm()) { ASSERT_MSG(0, "WriteNormalOp - Imm argument, wrong order"); } @@ -1147,27 +1147,22 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o int immToWrite = 0; - if (operand.IsImm()) - { + if (operand.IsImm()) { WriteRex(emit, bits, bits); - if (!toRM) - { + if (!toRM) { ASSERT_MSG(0, "WriteNormalOp - Writing to Imm (!toRM)"); } - if (operand.scale == SCALE_IMM8 && bits == 8) - { + if (operand.scale == SCALE_IMM8 && bits == 8) { // op al, imm8 - if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) - { + if (!scale && offsetOrBaseReg == AL && normalops[op].eaximm8 != 0xCC) { emit->Write8(normalops[op].eaximm8); emit->Write8((u8)operand.offset); return; } // mov reg, imm8 - if (!scale && op == nrmMOV) - { + if (!scale && op == nrmMOV) { emit->Write8(0xB0 + (offsetOrBaseReg & 7)); emit->Write8((u8)operand.offset); return; @@ -1175,26 +1170,20 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o // op r/m8, imm8 emit->Write8(normalops[op].imm8); immToWrite = 8; - } - else if ((operand.scale == SCALE_IMM16 && bits == 16) || - (operand.scale == SCALE_IMM32 && bits == 32) || - (operand.scale == SCALE_IMM32 && bits == 64)) - { + } else if ((operand.scale == SCALE_IMM16 && bits == 16) || + (operand.scale == SCALE_IMM32 && bits == 32) || + (operand.scale == SCALE_IMM32 && bits == 64)) { // Try to save immediate size if we can, but first check to see // if the instruction supports simm8. // op r/m, imm8 if (normalops[op].simm8 != 0xCC && ((operand.scale == SCALE_IMM16 && (s16)operand.offset == (s8)operand.offset) || - (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) - { + (operand.scale == SCALE_IMM32 && (s32)operand.offset == (s8)operand.offset))) { emit->Write8(normalops[op].simm8); immToWrite = 8; - } - else - { + } else { // mov reg, imm - if (!scale && op == nrmMOV && bits != 64) - { + if (!scale && op == nrmMOV && bits != 64) { emit->Write8(0xB8 + (offsetOrBaseReg & 7)); if (bits == 16) emit->Write16((u16)operand.offset); @@ -1203,8 +1192,7 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o return; } // op eax, imm - if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) - { + if (!scale && offsetOrBaseReg == EAX && normalops[op].eaximm32 != 0xCC) { emit->Write8(normalops[op].eaximm32); if (bits == 16) emit->Write16((u16)operand.offset); @@ -1216,54 +1204,41 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o emit->Write8(normalops[op].imm32); immToWrite = bits == 16 ? 16 : 32; } - } - else if ((operand.scale == SCALE_IMM8 && bits == 16) || - (operand.scale == SCALE_IMM8 && bits == 32) || - (operand.scale == SCALE_IMM8 && bits == 64)) - { + } else if ((operand.scale == SCALE_IMM8 && bits == 16) || + (operand.scale == SCALE_IMM8 && bits == 32) || + (operand.scale == SCALE_IMM8 && bits == 64)) { // op r/m, imm8 emit->Write8(normalops[op].simm8); immToWrite = 8; - } - else if (operand.scale == SCALE_IMM64 && bits == 64) - { - if (scale) - { + } else if (operand.scale == SCALE_IMM64 && bits == 64) { + if (scale) { ASSERT_MSG(0, "WriteNormalOp - MOV with 64-bit imm requres register destination"); } // mov reg64, imm64 - else if (op == nrmMOV) - { + else if (op == nrmMOV) { emit->Write8(0xB8 + (offsetOrBaseReg & 7)); emit->Write64((u64)operand.offset); return; } ASSERT_MSG(0, "WriteNormalOp - Only MOV can take 64-bit imm"); - } - else - { + } else { ASSERT_MSG(0, "WriteNormalOp - Unhandled case"); } - _operandReg = (X64Reg)normalops[op].ext; //pass extension in REG of ModRM - } - else - { + _operandReg = (X64Reg)normalops[op].ext; // pass extension in REG of ModRM + } else { _operandReg = (X64Reg)operand.offsetOrBaseReg; WriteRex(emit, bits, bits, _operandReg); // op r/m, reg - if (toRM) - { + if (toRM) { emit->Write8(bits == 8 ? normalops[op].toRm8 : normalops[op].toRm32); } // op reg, r/m - else - { + else { emit->Write8(bits == 8 ? normalops[op].fromRm8 : normalops[op].fromRm32); } } WriteRest(emit, immToWrite >> 3, _operandReg); - switch (immToWrite) - { + switch (immToWrite) { case 0: break; case 8: @@ -1280,66 +1255,84 @@ void OpArg::WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg& o } } -void XEmitter::WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2) -{ - if (a1.IsImm()) - { - //Booh! Can't write to an imm +void XEmitter::WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, + const OpArg& a2) { + if (a1.IsImm()) { + // Booh! Can't write to an imm ASSERT_MSG(0, "WriteNormalOp - a1 cannot be imm"); return; } - if (a2.IsImm()) - { + if (a2.IsImm()) { a1.WriteNormalOp(emit, true, op, a2, bits); - } - else - { - if (a1.IsSimpleReg()) - { + } else { + if (a1.IsSimpleReg()) { a2.WriteNormalOp(emit, false, op, a1, bits); - } - else - { - ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), "WriteNormalOp - a1 and a2 cannot both be memory"); + } else { + ASSERT_MSG(a2.IsSimpleReg() || a2.IsImm(), + "WriteNormalOp - a1 and a2 cannot both be memory"); a1.WriteNormalOp(emit, true, op, a2, bits); } } } -void XEmitter::ADD (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADD, a1, a2);} -void XEmitter::ADC (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmADC, a1, a2);} -void XEmitter::SUB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSUB, a1, a2);} -void XEmitter::SBB (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmSBB, a1, a2);} -void XEmitter::AND (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmAND, a1, a2);} -void XEmitter::OR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmOR , a1, a2);} -void XEmitter::XOR (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmXOR, a1, a2);} -void XEmitter::MOV (int bits, const OpArg& a1, const OpArg& a2) -{ +void XEmitter::ADD(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmADD, a1, a2); +} +void XEmitter::ADC(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmADC, a1, a2); +} +void XEmitter::SUB(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmSUB, a1, a2); +} +void XEmitter::SBB(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmSBB, a1, a2); +} +void XEmitter::AND(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmAND, a1, a2); +} +void XEmitter::OR(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmOR, a1, a2); +} +void XEmitter::XOR(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmXOR, a1, a2); +} +void XEmitter::MOV(int bits, const OpArg& a1, const OpArg& a2) { if (a1.IsSimpleReg() && a2.IsSimpleReg() && a1.GetSimpleReg() == a2.GetSimpleReg()) LOG_ERROR(Common, "Redundant MOV @ %p - bug in JIT?", code); WriteNormalOp(this, bits, nrmMOV, a1, a2); } -void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmTEST, a1, a2);} -void XEmitter::CMP (int bits, const OpArg& a1, const OpArg& a2) {CheckFlags(); WriteNormalOp(this, bits, nrmCMP, a1, a2);} -void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) {WriteNormalOp(this, bits, nrmXCHG, a1, a2);} +void XEmitter::TEST(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmTEST, a1, a2); +} +void XEmitter::CMP(int bits, const OpArg& a1, const OpArg& a2) { + CheckFlags(); + WriteNormalOp(this, bits, nrmCMP, a1, a2); +} +void XEmitter::XCHG(int bits, const OpArg& a1, const OpArg& a2) { + WriteNormalOp(this, bits, nrmXCHG, a1, a2); +} -void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) -{ +void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) { CheckFlags(); - if (bits == 8) - { + if (bits == 8) { ASSERT_MSG(0, "IMUL - illegal bit size!"); return; } - if (a1.IsImm()) - { + if (a1.IsImm()) { ASSERT_MSG(0, "IMUL - second arg cannot be imm!"); return; } - if (!a2.IsImm()) - { + if (!a2.IsImm()) { ASSERT_MSG(0, "IMUL - third arg must be imm!"); return; } @@ -1348,46 +1341,34 @@ void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a1, const OpArg& a2) Write8(0x66); a1.WriteRex(this, bits, bits, regOp); - if (a2.GetImmBits() == 8 || - (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) || - (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) - { + if (a2.GetImmBits() == 8 || (a2.GetImmBits() == 16 && (s8)a2.offset == (s16)a2.offset) || + (a2.GetImmBits() == 32 && (s8)a2.offset == (s32)a2.offset)) { Write8(0x6B); a1.WriteRest(this, 1, regOp); Write8((u8)a2.offset); - } - else - { + } else { Write8(0x69); - if (a2.GetImmBits() == 16 && bits == 16) - { + if (a2.GetImmBits() == 16 && bits == 16) { a1.WriteRest(this, 2, regOp); Write16((u16)a2.offset); - } - else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) - { + } else if (a2.GetImmBits() == 32 && (bits == 32 || bits == 64)) { a1.WriteRest(this, 4, regOp); Write32((u32)a2.offset); - } - else - { + } else { ASSERT_MSG(0, "IMUL - unhandled case!"); } } } -void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) -{ +void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) { CheckFlags(); - if (bits == 8) - { + if (bits == 8) { ASSERT_MSG(0, "IMUL - illegal bit size!"); return; } - if (a.IsImm()) - { - IMUL(bits, regOp, R(regOp), a) ; + if (a.IsImm()) { + IMUL(bits, regOp, R(regOp), a); return; } @@ -1399,9 +1380,7 @@ void XEmitter::IMUL(int bits, X64Reg regOp, const OpArg& a) a.WriteRest(this, 0, regOp); } - -void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) -{ +void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extrabytes) { if (opPrefix) Write8(opPrefix); arg.operandReg = regOp; @@ -1413,13 +1392,11 @@ void XEmitter::WriteSSEOp(u8 opPrefix, u16 op, X64Reg regOp, OpArg arg, int extr arg.WriteRest(this, extrabytes); } -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) -{ +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { WriteAVXOp(opPrefix, op, regOp, INVALID_REG, arg, extrabytes); } -static int GetVEXmmmmm(u16 op) -{ +static int GetVEXmmmmm(u16 op) { // Currently, only 0x38 and 0x3A are used as secondary escape byte. if ((op >> 8) == 0x3A) return 3; @@ -1429,8 +1406,7 @@ static int GetVEXmmmmm(u16 op) return 1; } -static int GetVEXpp(u8 opPrefix) -{ +static int GetVEXpp(u8 opPrefix) { if (opPrefix == 0x66) return 1; if (opPrefix == 0xF3) @@ -1441,21 +1417,22 @@ static int GetVEXpp(u8 opPrefix) return 0; } -void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) -{ +void XEmitter::WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, + int extrabytes) { if (!Common::GetCPUCaps().avx) ASSERT_MSG(0, "Trying to use AVX on a system that doesn't support it. Bad programmer."); int mmmmm = GetVEXmmmmm(op); int pp = GetVEXpp(opPrefix); - // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here + // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size + // here arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm); Write8(op & 0xFF); arg.WriteRest(this, extrabytes, regOp1); } // Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 -void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) -{ +void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, + const OpArg& arg, int extrabytes) { if (size != 32 && size != 64) ASSERT_MSG(0, "VEX GPR instructions only support 32-bit and 64-bit modes!"); int mmmmm = GetVEXmmmmm(op); @@ -1465,49 +1442,50 @@ void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg r arg.WriteRest(this, extrabytes, regOp1); } -void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) -{ +void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, + const OpArg& arg, int extrabytes) { CheckFlags(); if (!Common::GetCPUCaps().bmi1) ASSERT_MSG(0, "Trying to use BMI1 on a system that doesn't support it. Bad programmer."); WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); } -void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes) -{ +void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, + const OpArg& arg, int extrabytes) { CheckFlags(); if (!Common::GetCPUCaps().bmi2) ASSERT_MSG(0, "Trying to use BMI2 on a system that doesn't support it. Bad programmer."); WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); } -void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6E, dest, arg, 0);} -void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(0x66, 0x7E, src, arg, 0);} +void XEmitter::MOVD_xmm(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x6E, dest, arg, 0); +} +void XEmitter::MOVD_xmm(const OpArg& arg, X64Reg src) { + WriteSSEOp(0x66, 0x7E, src, arg, 0); +} -void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) -{ +void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) { #ifdef ARCHITECTURE_x86_64 - // Alternate encoding - // This does not display correctly in MSVC's debugger, it thinks it's a MOVD - arg.operandReg = dest; - Write8(0x66); - arg.WriteRex(this, 64, 0); - Write8(0x0f); - Write8(0x6E); - arg.WriteRest(this, 0); + // Alternate encoding + // This does not display correctly in MSVC's debugger, it thinks it's a MOVD + arg.operandReg = dest; + Write8(0x66); + arg.WriteRex(this, 64, 0); + Write8(0x0f); + Write8(0x6E); + arg.WriteRest(this, 0); #else - arg.operandReg = dest; - Write8(0xF3); - Write8(0x0f); - Write8(0x7E); - arg.WriteRest(this, 0); + arg.operandReg = dest; + Write8(0xF3); + Write8(0x0f); + Write8(0x7E); + arg.WriteRest(this, 0); #endif } -void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) -{ - if (src > 7 || arg.IsSimpleReg()) - { +void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) { + if (src > 7 || arg.IsSimpleReg()) { // Alternate encoding // This does not display correctly in MSVC's debugger, it thinks it's a MOVD arg.operandReg = src; @@ -1516,9 +1494,7 @@ void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) Write8(0x0f); Write8(0x7E); arg.WriteRest(this, 0); - } - else - { + } else { arg.operandReg = src; arg.WriteRex(this, 0, 0); Write8(0x66); @@ -1528,8 +1504,7 @@ void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) } } -void XEmitter::WriteMXCSR(OpArg arg, int ext) -{ +void XEmitter::WriteMXCSR(OpArg arg, int ext) { if (arg.IsImm() || arg.IsSimpleReg()) ASSERT_MSG(0, "MXCSR - invalid operand"); @@ -1540,143 +1515,357 @@ void XEmitter::WriteMXCSR(OpArg arg, int ext) arg.WriteRest(this); } -void XEmitter::STMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 3);} -void XEmitter::LDMXCSR(const OpArg& memloc) {WriteMXCSR(memloc, 2);} - -void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg);} -void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVNTP, regOp, arg);} -void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVNTP, regOp, arg);} - -void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseADD, regOp, arg);} -void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseADD, regOp, arg);} -void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSUB, regOp, arg);} -void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSUB, regOp, arg);} -void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMUL, regOp, arg);} -void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMUL, regOp, arg);} -void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseDIV, regOp, arg);} -void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseDIV, regOp, arg);} -void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMIN, regOp, arg);} -void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMIN, regOp, arg);} -void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMAX, regOp, arg);} -void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMAX, regOp, arg);} -void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseSQRT, regOp, arg);} -void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseSQRT, regOp, arg);} -void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRCP, regOp, arg);} -void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseRSQRT, regOp, arg);} - -void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseADD, regOp, arg);} -void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseADD, regOp, arg);} -void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSUB, regOp, arg);} -void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSUB, regOp, arg);} -void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x00, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) {WriteSSEOp(0x66, sseCMP, regOp, arg, 1); Write8(compare);} -void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseAND, regOp, arg);} -void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseAND, regOp, arg);} -void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseANDN, regOp, arg);} -void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseANDN, regOp, arg);} -void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseOR, regOp, arg);} -void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseOR, regOp, arg);} -void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseXOR, regOp, arg);} -void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseXOR, regOp, arg);} -void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMUL, regOp, arg);} -void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMUL, regOp, arg);} -void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseDIV, regOp, arg);} -void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseDIV, regOp, arg);} -void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMIN, regOp, arg);} -void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMIN, regOp, arg);} -void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMAX, regOp, arg);} -void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMAX, regOp, arg);} -void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseSQRT, regOp, arg);} -void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseSQRT, regOp, arg);} -void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseRCP, regOp, arg); } -void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseRSQRT, regOp, arg);} -void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x00, sseSHUF, regOp, arg,1); Write8(shuffle);} -void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, sseSHUF, regOp, arg,1); Write8(shuffle);} - -void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseHADD, regOp, arg);} - -void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseCOMIS, regOp, arg);} //weird that these should be packed -void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseCOMIS, regOp, arg);} //ordered -void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseUCOMIS, regOp, arg);} //unordered -void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseUCOMIS, regOp, arg);} - -void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg);} -void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg);} -void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg);} -void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg);} - -void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg);} - -void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg);} -void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg);} -void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg);} -void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg);} - -void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg);} -void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg);} -void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) {WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg);} - -void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); } -void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); } -void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); } -void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); } - -void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); } -void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); } -void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); } -void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); } - -void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2));} -void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) {WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2));} - -void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5A, regOp, arg);} -void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5A, regOp, arg);} - -void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x5A, regOp, arg);} -void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5A, regOp, arg);} -void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2D, regOp, arg);} -void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2D, regOp, arg);} -void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2A, regOp, arg);} -void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2A, regOp, arg);} - -void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0xE6, regOp, arg);} -void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x00, 0x5B, regOp, arg);} -void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0xE6, regOp, arg);} -void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0x5B, regOp, arg);} - -void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF2, 0x2C, regOp, arg);} -void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x2C, regOp, arg);} -void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0xF3, 0x5B, regOp, arg);} -void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) {WriteSSEOp(0x66, 0xE6, regOp, arg);} - -void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src));} - -void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x50, dest, arg);} -void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x50, dest, arg);} - -void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only +void XEmitter::STMXCSR(const OpArg& memloc) { + WriteMXCSR(memloc, 3); +} +void XEmitter::LDMXCSR(const OpArg& memloc) { + WriteMXCSR(memloc, 2); +} + +void XEmitter::MOVNTDQ(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x66, sseMOVNTDQ, regOp, arg); +} +void XEmitter::MOVNTPS(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x00, sseMOVNTP, regOp, arg); +} +void XEmitter::MOVNTPD(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x66, sseMOVNTP, regOp, arg); +} + +void XEmitter::ADDSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseADD, regOp, arg); +} +void XEmitter::ADDSD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseADD, regOp, arg); +} +void XEmitter::SUBSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseSUB, regOp, arg); +} +void XEmitter::SUBSD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseSUB, regOp, arg); +} +void XEmitter::CMPSS(X64Reg regOp, const OpArg& arg, u8 compare) { + WriteSSEOp(0xF3, sseCMP, regOp, arg, 1); + Write8(compare); +} +void XEmitter::CMPSD(X64Reg regOp, const OpArg& arg, u8 compare) { + WriteSSEOp(0xF2, sseCMP, regOp, arg, 1); + Write8(compare); +} +void XEmitter::MULSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseMUL, regOp, arg); +} +void XEmitter::MULSD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseMUL, regOp, arg); +} +void XEmitter::DIVSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseDIV, regOp, arg); +} +void XEmitter::DIVSD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseDIV, regOp, arg); +} +void XEmitter::MINSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseMIN, regOp, arg); +} +void XEmitter::MINSD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseMIN, regOp, arg); +} +void XEmitter::MAXSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseMAX, regOp, arg); +} +void XEmitter::MAXSD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseMAX, regOp, arg); +} +void XEmitter::SQRTSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseSQRT, regOp, arg); +} +void XEmitter::SQRTSD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseSQRT, regOp, arg); +} +void XEmitter::RCPSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseRCP, regOp, arg); +} +void XEmitter::RSQRTSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseRSQRT, regOp, arg); +} + +void XEmitter::ADDPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseADD, regOp, arg); +} +void XEmitter::ADDPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseADD, regOp, arg); +} +void XEmitter::SUBPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseSUB, regOp, arg); +} +void XEmitter::SUBPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseSUB, regOp, arg); +} +void XEmitter::CMPPS(X64Reg regOp, const OpArg& arg, u8 compare) { + WriteSSEOp(0x00, sseCMP, regOp, arg, 1); + Write8(compare); +} +void XEmitter::CMPPD(X64Reg regOp, const OpArg& arg, u8 compare) { + WriteSSEOp(0x66, sseCMP, regOp, arg, 1); + Write8(compare); +} +void XEmitter::ANDPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseAND, regOp, arg); +} +void XEmitter::ANDPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseAND, regOp, arg); +} +void XEmitter::ANDNPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseANDN, regOp, arg); +} +void XEmitter::ANDNPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseANDN, regOp, arg); +} +void XEmitter::ORPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseOR, regOp, arg); +} +void XEmitter::ORPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseOR, regOp, arg); +} +void XEmitter::XORPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseXOR, regOp, arg); +} +void XEmitter::XORPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseXOR, regOp, arg); +} +void XEmitter::MULPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseMUL, regOp, arg); +} +void XEmitter::MULPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseMUL, regOp, arg); +} +void XEmitter::DIVPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseDIV, regOp, arg); +} +void XEmitter::DIVPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseDIV, regOp, arg); +} +void XEmitter::MINPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseMIN, regOp, arg); +} +void XEmitter::MINPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseMIN, regOp, arg); +} +void XEmitter::MAXPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseMAX, regOp, arg); +} +void XEmitter::MAXPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseMAX, regOp, arg); +} +void XEmitter::SQRTPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseSQRT, regOp, arg); +} +void XEmitter::SQRTPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseSQRT, regOp, arg); +} +void XEmitter::RCPPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseRCP, regOp, arg); +} +void XEmitter::RSQRTPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseRSQRT, regOp, arg); +} +void XEmitter::SHUFPS(X64Reg regOp, const OpArg& arg, u8 shuffle) { + WriteSSEOp(0x00, sseSHUF, regOp, arg, 1); + Write8(shuffle); +} +void XEmitter::SHUFPD(X64Reg regOp, const OpArg& arg, u8 shuffle) { + WriteSSEOp(0x66, sseSHUF, regOp, arg, 1); + Write8(shuffle); +} + +void XEmitter::HADDPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseHADD, regOp, arg); +} + +void XEmitter::COMISS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseCOMIS, regOp, arg); +} // weird that these should be packed +void XEmitter::COMISD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseCOMIS, regOp, arg); +} // ordered +void XEmitter::UCOMISS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseUCOMIS, regOp, arg); +} // unordered +void XEmitter::UCOMISD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseUCOMIS, regOp, arg); +} + +void XEmitter::MOVAPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseMOVAPfromRM, regOp, arg); +} +void XEmitter::MOVAPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseMOVAPfromRM, regOp, arg); +} +void XEmitter::MOVAPS(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x00, sseMOVAPtoRM, regOp, arg); +} +void XEmitter::MOVAPD(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x66, sseMOVAPtoRM, regOp, arg); +} + +void XEmitter::MOVUPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseMOVUPfromRM, regOp, arg); +} +void XEmitter::MOVUPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseMOVUPfromRM, regOp, arg); +} +void XEmitter::MOVUPS(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x00, sseMOVUPtoRM, regOp, arg); +} +void XEmitter::MOVUPD(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x66, sseMOVUPtoRM, regOp, arg); +} + +void XEmitter::MOVDQA(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseMOVDQfromRM, regOp, arg); +} +void XEmitter::MOVDQA(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x66, sseMOVDQtoRM, regOp, arg); +} +void XEmitter::MOVDQU(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseMOVDQfromRM, regOp, arg); +} +void XEmitter::MOVDQU(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0xF3, sseMOVDQtoRM, regOp, arg); +} + +void XEmitter::MOVSS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, sseMOVUPfromRM, regOp, arg); +} +void XEmitter::MOVSD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, sseMOVUPfromRM, regOp, arg); +} +void XEmitter::MOVSS(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0xF3, sseMOVUPtoRM, regOp, arg); +} +void XEmitter::MOVSD(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0xF2, sseMOVUPtoRM, regOp, arg); +} + +void XEmitter::MOVLPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseMOVLPfromRM, regOp, arg); +} +void XEmitter::MOVLPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseMOVLPfromRM, regOp, arg); +} +void XEmitter::MOVLPS(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x00, sseMOVLPtoRM, regOp, arg); +} +void XEmitter::MOVLPD(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x66, sseMOVLPtoRM, regOp, arg); +} + +void XEmitter::MOVHPS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, sseMOVHPfromRM, regOp, arg); +} +void XEmitter::MOVHPD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, sseMOVHPfromRM, regOp, arg); +} +void XEmitter::MOVHPS(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x00, sseMOVHPtoRM, regOp, arg); +} +void XEmitter::MOVHPD(const OpArg& arg, X64Reg regOp) { + WriteSSEOp(0x66, sseMOVHPtoRM, regOp, arg); +} + +void XEmitter::MOVHLPS(X64Reg regOp1, X64Reg regOp2) { + WriteSSEOp(0x00, sseMOVHLPS, regOp1, R(regOp2)); +} +void XEmitter::MOVLHPS(X64Reg regOp1, X64Reg regOp2) { + WriteSSEOp(0x00, sseMOVLHPS, regOp1, R(regOp2)); +} + +void XEmitter::CVTPS2PD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, 0x5A, regOp, arg); +} +void XEmitter::CVTPD2PS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, 0x5A, regOp, arg); +} + +void XEmitter::CVTSD2SS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, 0x5A, regOp, arg); +} +void XEmitter::CVTSS2SD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, 0x5A, regOp, arg); +} +void XEmitter::CVTSD2SI(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, 0x2D, regOp, arg); +} +void XEmitter::CVTSS2SI(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, 0x2D, regOp, arg); +} +void XEmitter::CVTSI2SD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, 0x2A, regOp, arg); +} +void XEmitter::CVTSI2SS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, 0x2A, regOp, arg); +} + +void XEmitter::CVTDQ2PD(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, 0xE6, regOp, arg); +} +void XEmitter::CVTDQ2PS(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x00, 0x5B, regOp, arg); +} +void XEmitter::CVTPD2DQ(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, 0xE6, regOp, arg); +} +void XEmitter::CVTPS2DQ(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, 0x5B, regOp, arg); +} + +void XEmitter::CVTTSD2SI(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF2, 0x2C, regOp, arg); +} +void XEmitter::CVTTSS2SI(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, 0x2C, regOp, arg); +} +void XEmitter::CVTTPS2DQ(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0xF3, 0x5B, regOp, arg); +} +void XEmitter::CVTTPD2DQ(X64Reg regOp, const OpArg& arg) { + WriteSSEOp(0x66, 0xE6, regOp, arg); +} + +void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) { + WriteSSEOp(0x66, sseMASKMOVDQU, dest, R(src)); +} + +void XEmitter::MOVMSKPS(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x00, 0x50, dest, arg); +} +void XEmitter::MOVMSKPD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x50, dest, arg); +} + +void XEmitter::LDDQU(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0xF2, sseLDDQU, dest, arg); +} // For integer data only // THESE TWO ARE UNTESTED. -void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x14, dest, arg);} -void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x00, 0x15, dest, arg);} +void XEmitter::UNPCKLPS(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x00, 0x14, dest, arg); +} +void XEmitter::UNPCKHPS(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x00, 0x15, dest, arg); +} -void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x14, dest, arg);} -void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x15, dest, arg);} +void XEmitter::UNPCKLPD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x14, dest, arg); +} +void XEmitter::UNPCKHPD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x15, dest, arg); +} -void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) -{ - if (Common::GetCPUCaps().sse3) - { - WriteSSEOp(0xF2, 0x12, regOp, arg); //SSE3 movddup - } - else - { +void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) { + if (Common::GetCPUCaps().sse3) { + WriteSSEOp(0xF2, 0x12, regOp, arg); // SSE3 movddup + } else { // Simulate this instruction with SSE2 instructions if (!arg.IsSimpleReg(regOp)) MOVSD(regOp, arg); @@ -1684,38 +1873,48 @@ void XEmitter::MOVDDUP(X64Reg regOp, const OpArg& arg) } } -//There are a few more left +// There are a few more left // Also some integer instructions are missing -void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x6B, dest, arg);} -void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x63, dest, arg);} -void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x67, dest, arg);} +void XEmitter::PACKSSDW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x6B, dest, arg); +} +void XEmitter::PACKSSWB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x63, dest, arg); +} +void XEmitter::PACKUSWB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x67, dest, arg); +} -void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x60, dest, arg);} -void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x61, dest, arg);} -void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x62, dest, arg);} -void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg &arg) {WriteSSEOp(0x66, 0x6C, dest, arg);} +void XEmitter::PUNPCKLBW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x60, dest, arg); +} +void XEmitter::PUNPCKLWD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x61, dest, arg); +} +void XEmitter::PUNPCKLDQ(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x62, dest, arg); +} +void XEmitter::PUNPCKLQDQ(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x6C, dest, arg); +} -void XEmitter::PSRLW(X64Reg reg, int shift) -{ +void XEmitter::PSRLW(X64Reg reg, int shift) { WriteSSEOp(0x66, 0x71, (X64Reg)2, R(reg)); Write8(shift); } -void XEmitter::PSRLD(X64Reg reg, int shift) -{ +void XEmitter::PSRLD(X64Reg reg, int shift) { WriteSSEOp(0x66, 0x72, (X64Reg)2, R(reg)); Write8(shift); } -void XEmitter::PSRLQ(X64Reg reg, int shift) -{ +void XEmitter::PSRLQ(X64Reg reg, int shift) { WriteSSEOp(0x66, 0x73, (X64Reg)2, R(reg)); Write8(shift); } -void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) -{ +void XEmitter::PSRLQ(X64Reg reg, const OpArg& arg) { WriteSSEOp(0x66, 0xd3, reg, arg); } @@ -1724,20 +1923,17 @@ void XEmitter::PSRLDQ(X64Reg reg, int shift) { Write8(shift); } -void XEmitter::PSLLW(X64Reg reg, int shift) -{ +void XEmitter::PSLLW(X64Reg reg, int shift) { WriteSSEOp(0x66, 0x71, (X64Reg)6, R(reg)); Write8(shift); } -void XEmitter::PSLLD(X64Reg reg, int shift) -{ +void XEmitter::PSLLD(X64Reg reg, int shift) { WriteSSEOp(0x66, 0x72, (X64Reg)6, R(reg)); Write8(shift); } -void XEmitter::PSLLQ(X64Reg reg, int shift) -{ +void XEmitter::PSLLQ(X64Reg reg, int shift) { WriteSSEOp(0x66, 0x73, (X64Reg)6, R(reg)); Write8(shift); } @@ -1747,267 +1943,643 @@ void XEmitter::PSLLDQ(X64Reg reg, int shift) { Write8(shift); } -void XEmitter::PSRAW(X64Reg reg, int shift) -{ +void XEmitter::PSRAW(X64Reg reg, int shift) { WriteSSEOp(0x66, 0x71, (X64Reg)4, R(reg)); Write8(shift); } -void XEmitter::PSRAD(X64Reg reg, int shift) -{ +void XEmitter::PSRAD(X64Reg reg, int shift) { WriteSSEOp(0x66, 0x72, (X64Reg)4, R(reg)); Write8(shift); } -void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) -{ +void XEmitter::WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { if (!Common::GetCPUCaps().ssse3) ASSERT_MSG(0, "Trying to use SSSE3 on a system that doesn't support it. Bad programmer."); WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); } -void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) -{ +void XEmitter::WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes) { if (!Common::GetCPUCaps().sse4_1) ASSERT_MSG(0, "Trying to use SSE4.1 on a system that doesn't support it. Bad programmer."); WriteSSEOp(opPrefix, op, regOp, arg, extrabytes); } -void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) {WriteSSSE3Op(0x66, 0x3800, dest, arg);} -void XEmitter::PTEST(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3817, dest, arg);} -void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} -void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} - -void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} -void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} -void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} -void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383b, dest, arg);} -void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383c, dest, arg);} -void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383d, dest, arg);} -void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383e, dest, arg);} -void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x383f, dest, arg);} - -void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3820, dest, arg);} -void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3821, dest, arg);} -void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3822, dest, arg);} -void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3823, dest, arg);} -void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3824, dest, arg);} -void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3825, dest, arg);} -void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3830, dest, arg);} -void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3831, dest, arg);} -void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3832, dest, arg);} -void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3833, dest, arg);} -void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3834, dest, arg);} -void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3835, dest, arg);} - -void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3810, dest, arg);} -void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3814, dest, arg);} -void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) {WriteSSE41Op(0x66, 0x3815, dest, arg);} -void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); Write8(blend); } -void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); Write8(blend); } - -void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); Write8(mode);} -void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) {WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); Write8(mode);} - -void XEmitter::PAND(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDB, dest, arg);} -void XEmitter::PANDN(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDF, dest, arg);} -void XEmitter::PXOR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEF, dest, arg);} -void XEmitter::POR(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEB, dest, arg);} - -void XEmitter::PADDB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFC, dest, arg);} -void XEmitter::PADDW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFD, dest, arg);} -void XEmitter::PADDD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFE, dest, arg);} -void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD4, dest, arg);} - -void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEC, dest, arg);} -void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xED, dest, arg);} -void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDC, dest, arg);} -void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDD, dest, arg);} - -void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF8, dest, arg);} -void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF9, dest, arg);} -void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFA, dest, arg);} -void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xFB, dest, arg);} - -void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE8, dest, arg);} -void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE9, dest, arg);} -void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD8, dest, arg);} -void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD9, dest, arg);} - -void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE0, dest, arg);} -void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xE3, dest, arg);} - -void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x74, dest, arg);} -void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x75, dest, arg);} -void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x76, dest, arg);} - -void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x64, dest, arg);} -void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x65, dest, arg);} -void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0x66, dest, arg);} - -void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC5, dest, arg, 1); Write8(subreg);} -void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) {WriteSSEOp(0x66, 0xC4, dest, arg, 1); Write8(subreg);} - -void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF5, dest, arg); } -void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xF6, dest, arg);} - -void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEE, dest, arg); } -void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDE, dest, arg); } -void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xEA, dest, arg); } -void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xDA, dest, arg); } - -void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) {WriteSSEOp(0x66, 0xD7, dest, arg); } -void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0x66, 0x70, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF2, 0x70, regOp, arg, 1); Write8(shuffle);} -void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) {WriteSSEOp(0xF3, 0x70, regOp, arg, 1); Write8(shuffle);} +void XEmitter::PSHUFB(X64Reg dest, const OpArg& arg) { + WriteSSSE3Op(0x66, 0x3800, dest, arg); +} +void XEmitter::PTEST(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3817, dest, arg); +} +void XEmitter::PACKUSDW(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x382b, dest, arg); +} +void XEmitter::DPPS(X64Reg dest, const OpArg& arg, u8 mask) { + WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); + Write8(mask); +} + +void XEmitter::PMINSB(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3838, dest, arg); +} +void XEmitter::PMINSD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3839, dest, arg); +} +void XEmitter::PMINUW(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x383a, dest, arg); +} +void XEmitter::PMINUD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x383b, dest, arg); +} +void XEmitter::PMAXSB(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x383c, dest, arg); +} +void XEmitter::PMAXSD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x383d, dest, arg); +} +void XEmitter::PMAXUW(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x383e, dest, arg); +} +void XEmitter::PMAXUD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x383f, dest, arg); +} + +void XEmitter::PMOVSXBW(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3820, dest, arg); +} +void XEmitter::PMOVSXBD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3821, dest, arg); +} +void XEmitter::PMOVSXBQ(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3822, dest, arg); +} +void XEmitter::PMOVSXWD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3823, dest, arg); +} +void XEmitter::PMOVSXWQ(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3824, dest, arg); +} +void XEmitter::PMOVSXDQ(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3825, dest, arg); +} +void XEmitter::PMOVZXBW(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3830, dest, arg); +} +void XEmitter::PMOVZXBD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3831, dest, arg); +} +void XEmitter::PMOVZXBQ(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3832, dest, arg); +} +void XEmitter::PMOVZXWD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3833, dest, arg); +} +void XEmitter::PMOVZXWQ(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3834, dest, arg); +} +void XEmitter::PMOVZXDQ(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3835, dest, arg); +} + +void XEmitter::PBLENDVB(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3810, dest, arg); +} +void XEmitter::BLENDVPS(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3814, dest, arg); +} +void XEmitter::BLENDVPD(X64Reg dest, const OpArg& arg) { + WriteSSE41Op(0x66, 0x3815, dest, arg); +} +void XEmitter::BLENDPS(X64Reg dest, const OpArg& arg, u8 blend) { + WriteSSE41Op(0x66, 0x3A0C, dest, arg, 1); + Write8(blend); +} +void XEmitter::BLENDPD(X64Reg dest, const OpArg& arg, u8 blend) { + WriteSSE41Op(0x66, 0x3A0D, dest, arg, 1); + Write8(blend); +} + +void XEmitter::ROUNDSS(X64Reg dest, const OpArg& arg, u8 mode) { + WriteSSE41Op(0x66, 0x3A0A, dest, arg, 1); + Write8(mode); +} +void XEmitter::ROUNDSD(X64Reg dest, const OpArg& arg, u8 mode) { + WriteSSE41Op(0x66, 0x3A0B, dest, arg, 1); + Write8(mode); +} +void XEmitter::ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode) { + WriteSSE41Op(0x66, 0x3A08, dest, arg, 1); + Write8(mode); +} +void XEmitter::ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode) { + WriteSSE41Op(0x66, 0x3A09, dest, arg, 1); + Write8(mode); +} + +void XEmitter::PAND(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xDB, dest, arg); +} +void XEmitter::PANDN(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xDF, dest, arg); +} +void XEmitter::PXOR(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xEF, dest, arg); +} +void XEmitter::POR(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xEB, dest, arg); +} + +void XEmitter::PADDB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xFC, dest, arg); +} +void XEmitter::PADDW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xFD, dest, arg); +} +void XEmitter::PADDD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xFE, dest, arg); +} +void XEmitter::PADDQ(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xD4, dest, arg); +} + +void XEmitter::PADDSB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xEC, dest, arg); +} +void XEmitter::PADDSW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xED, dest, arg); +} +void XEmitter::PADDUSB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xDC, dest, arg); +} +void XEmitter::PADDUSW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xDD, dest, arg); +} + +void XEmitter::PSUBB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xF8, dest, arg); +} +void XEmitter::PSUBW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xF9, dest, arg); +} +void XEmitter::PSUBD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xFA, dest, arg); +} +void XEmitter::PSUBQ(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xFB, dest, arg); +} + +void XEmitter::PSUBSB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xE8, dest, arg); +} +void XEmitter::PSUBSW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xE9, dest, arg); +} +void XEmitter::PSUBUSB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xD8, dest, arg); +} +void XEmitter::PSUBUSW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xD9, dest, arg); +} + +void XEmitter::PAVGB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xE0, dest, arg); +} +void XEmitter::PAVGW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xE3, dest, arg); +} + +void XEmitter::PCMPEQB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x74, dest, arg); +} +void XEmitter::PCMPEQW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x75, dest, arg); +} +void XEmitter::PCMPEQD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x76, dest, arg); +} + +void XEmitter::PCMPGTB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x64, dest, arg); +} +void XEmitter::PCMPGTW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x65, dest, arg); +} +void XEmitter::PCMPGTD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0x66, dest, arg); +} + +void XEmitter::PEXTRW(X64Reg dest, const OpArg& arg, u8 subreg) { + WriteSSEOp(0x66, 0xC5, dest, arg, 1); + Write8(subreg); +} +void XEmitter::PINSRW(X64Reg dest, const OpArg& arg, u8 subreg) { + WriteSSEOp(0x66, 0xC4, dest, arg, 1); + Write8(subreg); +} + +void XEmitter::PMADDWD(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xF5, dest, arg); +} +void XEmitter::PSADBW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xF6, dest, arg); +} + +void XEmitter::PMAXSW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xEE, dest, arg); +} +void XEmitter::PMAXUB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xDE, dest, arg); +} +void XEmitter::PMINSW(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xEA, dest, arg); +} +void XEmitter::PMINUB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xDA, dest, arg); +} + +void XEmitter::PMOVMSKB(X64Reg dest, const OpArg& arg) { + WriteSSEOp(0x66, 0xD7, dest, arg); +} +void XEmitter::PSHUFD(X64Reg regOp, const OpArg& arg, u8 shuffle) { + WriteSSEOp(0x66, 0x70, regOp, arg, 1); + Write8(shuffle); +} +void XEmitter::PSHUFLW(X64Reg regOp, const OpArg& arg, u8 shuffle) { + WriteSSEOp(0xF2, 0x70, regOp, arg, 1); + Write8(shuffle); +} +void XEmitter::PSHUFHW(X64Reg regOp, const OpArg& arg, u8 shuffle) { + WriteSSEOp(0xF3, 0x70, regOp, arg, 1); + Write8(shuffle); +} // VEX -void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg);} -void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg);} -void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg);} -void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg);} -void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg);} -void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg);} -void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg);} -void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg);} -void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg);} -void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) {WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); Write8(shuffle);} -void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg);} -void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg){WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg);} - -void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); } -void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); } -void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); } -void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); } -void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); } -void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); } -void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); } -void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); } - -void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); } -void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); } -void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); } -void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); } - -void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); } -void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); } -void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); } -void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); } -void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); } -void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); } -void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); } -void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); } -void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); } -void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); } -void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); } -void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); } -void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); } -void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); } -void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); } -void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); } -void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); } -void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); } -void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); } -void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); } -void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); } -void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); } -void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); } -void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); } -void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); } - -void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} -void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} -void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} -void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} -void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} -void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} -void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} -void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} +void XEmitter::VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0xF2, sseADD, regOp1, regOp2, arg); +} +void XEmitter::VSUBSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0xF2, sseSUB, regOp1, regOp2, arg); +} +void XEmitter::VMULSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0xF2, sseMUL, regOp1, regOp2, arg); +} +void XEmitter::VDIVSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0xF2, sseDIV, regOp1, regOp2, arg); +} +void XEmitter::VADDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, sseADD, regOp1, regOp2, arg); +} +void XEmitter::VSUBPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, sseSUB, regOp1, regOp2, arg); +} +void XEmitter::VMULPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, sseMUL, regOp1, regOp2, arg); +} +void XEmitter::VDIVPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, sseDIV, regOp1, regOp2, arg); +} +void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0xF2, sseSQRT, regOp1, regOp2, arg); +} +void XEmitter::VSHUFPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg, u8 shuffle) { + WriteAVXOp(0x66, sseSHUF, regOp1, regOp2, arg, 1); + Write8(shuffle); +} +void XEmitter::VUNPCKLPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x14, regOp1, regOp2, arg); +} +void XEmitter::VUNPCKHPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x15, regOp1, regOp2, arg); +} + +void XEmitter::VANDPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x00, sseAND, regOp1, regOp2, arg); +} +void XEmitter::VANDPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, sseAND, regOp1, regOp2, arg); +} +void XEmitter::VANDNPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x00, sseANDN, regOp1, regOp2, arg); +} +void XEmitter::VANDNPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, sseANDN, regOp1, regOp2, arg); +} +void XEmitter::VORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x00, sseOR, regOp1, regOp2, arg); +} +void XEmitter::VORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, sseOR, regOp1, regOp2, arg); +} +void XEmitter::VXORPS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x00, sseXOR, regOp1, regOp2, arg); +} +void XEmitter::VXORPD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, sseXOR, regOp1, regOp2, arg); +} + +void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0xDB, regOp1, regOp2, arg); +} +void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0xDF, regOp1, regOp2, arg); +} +void XEmitter::VPOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0xEB, regOp1, regOp2, arg); +} +void XEmitter::VPXOR(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0xEF, regOp1, regOp2, arg); +} + +void XEmitter::VFMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg); +} +void XEmitter::VFMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg); +} +void XEmitter::VFMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg); +} +void XEmitter::VFMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x3898, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38A8, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38B8, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg); +} +void XEmitter::VFMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg); +} +void XEmitter::VFMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg); +} +void XEmitter::VFMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x3899, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38A9, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38B9, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg); +} +void XEmitter::VFMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg); +} +void XEmitter::VFMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg); +} +void XEmitter::VFMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389A, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AA, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BA, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg); +} +void XEmitter::VFMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg); +} +void XEmitter::VFMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg); +} +void XEmitter::VFMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389B, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AB, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BB, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg); +} +void XEmitter::VFNMADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg); +} +void XEmitter::VFNMADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg); +} +void XEmitter::VFNMADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389C, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AC, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BC, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMADD132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg); +} +void XEmitter::VFNMADD213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg); +} +void XEmitter::VFNMADD231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg); +} +void XEmitter::VFNMADD132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389D, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMADD213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AD, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMADD231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BD, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg); +} +void XEmitter::VFNMSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg); +} +void XEmitter::VFNMSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg); +} +void XEmitter::VFNMSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389E, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AE, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BE, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMSUB132SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg); +} +void XEmitter::VFNMSUB213SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg); +} +void XEmitter::VFNMSUB231SS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg); +} +void XEmitter::VFNMSUB132SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x389F, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMSUB213SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38AF, regOp1, regOp2, arg, 1); +} +void XEmitter::VFNMSUB231SD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38BF, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMADDSUB132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg); +} +void XEmitter::VFMADDSUB213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg); +} +void XEmitter::VFMADDSUB231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg); +} +void XEmitter::VFMADDSUB132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x3896, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMADDSUB213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38A6, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMADDSUB231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38B6, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUBADD132PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg); +} +void XEmitter::VFMSUBADD213PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg); +} +void XEmitter::VFMSUBADD231PS(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg); +} +void XEmitter::VFMSUBADD132PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x3897, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUBADD213PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38A7, regOp1, regOp2, arg, 1); +} +void XEmitter::VFMSUBADD231PD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteAVXOp(0x66, 0x38B7, regOp1, regOp2, arg, 1); +} + +void XEmitter::SARX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { + WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg); +} +void XEmitter::SHLX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { + WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg); +} +void XEmitter::SHRX(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { + WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg); +} +void XEmitter::RORX(int bits, X64Reg regOp, const OpArg& arg, u8 rotate) { + WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); + Write8(rotate); +} +void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg); +} +void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg); +} +void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg); +} +void XEmitter::BZHI(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { + WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg); +} +void XEmitter::BLSR(int bits, X64Reg regOp, const OpArg& arg) { + WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg); +} +void XEmitter::BLSMSK(int bits, X64Reg regOp, const OpArg& arg) { + WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg); +} +void XEmitter::BLSI(int bits, X64Reg regOp, const OpArg& arg) { + WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg); +} +void XEmitter::BEXTR(int bits, X64Reg regOp1, const OpArg& arg, X64Reg regOp2) { + WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg); +} +void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, const OpArg& arg) { + WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg); +} // Prefixes -void XEmitter::LOCK() { Write8(0xF0); } -void XEmitter::REP() { Write8(0xF3); } -void XEmitter::REPNE() { Write8(0xF2); } -void XEmitter::FSOverride() { Write8(0x64); } -void XEmitter::GSOverride() { Write8(0x65); } +void XEmitter::LOCK() { + Write8(0xF0); +} +void XEmitter::REP() { + Write8(0xF3); +} +void XEmitter::REPNE() { + Write8(0xF2); +} +void XEmitter::FSOverride() { + Write8(0x64); +} +void XEmitter::GSOverride() { + Write8(0x65); +} -void XEmitter::FWAIT() -{ +void XEmitter::FWAIT() { Write8(0x9B); } // TODO: make this more generic -void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) -{ +void XEmitter::WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg) { int mf = 0; - ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), "WriteFloatLoadStore: 80 bits not supported for this instruction"); - switch (bits) - { - case 32: mf = 0; break; - case 64: mf = 4; break; - case 80: mf = 2; break; - default: ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)"); + ASSERT_MSG(!(bits == 80 && op_80b == floatINVALID), + "WriteFloatLoadStore: 80 bits not supported for this instruction"); + switch (bits) { + case 32: + mf = 0; + break; + case 64: + mf = 4; + break; + case 80: + mf = 2; + break; + default: + ASSERT_MSG(0, "WriteFloatLoadStore: invalid bits (should be 32/64/80)"); } Write8(0xd9 | mf); // x87 instructions use the reg field of the ModR/M byte as opcode: if (bits == 80) op = op_80b; - arg.WriteRest(this, 0, (X64Reg) op); + arg.WriteRest(this, 0, (X64Reg)op); } -void XEmitter::FLD(int bits, const OpArg& src) {WriteFloatLoadStore(bits, floatLD, floatLD80, src);} -void XEmitter::FST(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatST, floatINVALID, dest);} -void XEmitter::FSTP(int bits, const OpArg& dest) {WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest);} -void XEmitter::FNSTSW_AX() { Write8(0xDF); Write8(0xE0); } +void XEmitter::FLD(int bits, const OpArg& src) { + WriteFloatLoadStore(bits, floatLD, floatLD80, src); +} +void XEmitter::FST(int bits, const OpArg& dest) { + WriteFloatLoadStore(bits, floatST, floatINVALID, dest); +} +void XEmitter::FSTP(int bits, const OpArg& dest) { + WriteFloatLoadStore(bits, floatSTP, floatSTP80, dest); +} +void XEmitter::FNSTSW_AX() { + Write8(0xDF); + Write8(0xE0); +} -void XEmitter::RDTSC() { Write8(0x0F); Write8(0x31); } +void XEmitter::RDTSC() { + Write8(0x0F); + Write8(0x31); +} void XCodeBlock::PoisonMemory() { // x86/64: 0xCC = breakpoint memset(region, 0xCC, region_size); } - } diff --git a/src/common/x64/emitter.h b/src/common/x64/emitter.h index 60a77dfe1..467f7812f 100644 --- a/src/common/x64/emitter.h +++ b/src/common/x64/emitter.h @@ -21,8 +21,8 @@ #include "common/assert.h" #include "common/bit_set.h" -#include "common/common_types.h" #include "common/code_block.h" +#include "common/common_types.h" #if defined(ARCHITECTURE_x86_64) && !defined(_ARCH_64) #define _ARCH_64 @@ -34,75 +34,145 @@ #define PTRBITS 32 #endif -namespace Gen -{ - -enum X64Reg -{ - EAX = 0, EBX = 3, ECX = 1, EDX = 2, - ESI = 6, EDI = 7, EBP = 5, ESP = 4, - - RAX = 0, RBX = 3, RCX = 1, RDX = 2, - RSI = 6, RDI = 7, RBP = 5, RSP = 4, - R8 = 8, R9 = 9, R10 = 10,R11 = 11, - R12 = 12,R13 = 13,R14 = 14,R15 = 15, - - AL = 0, BL = 3, CL = 1, DL = 2, - SIL = 6, DIL = 7, BPL = 5, SPL = 4, - AH = 0x104, BH = 0x107, CH = 0x105, DH = 0x106, - - AX = 0, BX = 3, CX = 1, DX = 2, - SI = 6, DI = 7, BP = 5, SP = 4, - - XMM0=0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, - XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, - - YMM0=0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, - YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15, +namespace Gen { + +enum X64Reg { + EAX = 0, + EBX = 3, + ECX = 1, + EDX = 2, + ESI = 6, + EDI = 7, + EBP = 5, + ESP = 4, + + RAX = 0, + RBX = 3, + RCX = 1, + RDX = 2, + RSI = 6, + RDI = 7, + RBP = 5, + RSP = 4, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + + AL = 0, + BL = 3, + CL = 1, + DL = 2, + SIL = 6, + DIL = 7, + BPL = 5, + SPL = 4, + AH = 0x104, + BH = 0x107, + CH = 0x105, + DH = 0x106, + + AX = 0, + BX = 3, + CX = 1, + DX = 2, + SI = 6, + DI = 7, + BP = 5, + SP = 4, + + XMM0 = 0, + XMM1, + XMM2, + XMM3, + XMM4, + XMM5, + XMM6, + XMM7, + XMM8, + XMM9, + XMM10, + XMM11, + XMM12, + XMM13, + XMM14, + XMM15, + + YMM0 = 0, + YMM1, + YMM2, + YMM3, + YMM4, + YMM5, + YMM6, + YMM7, + YMM8, + YMM9, + YMM10, + YMM11, + YMM12, + YMM13, + YMM14, + YMM15, INVALID_REG = 0xFFFFFFFF }; -enum CCFlags -{ - CC_O = 0, - CC_NO = 1, - CC_B = 2, CC_C = 2, CC_NAE = 2, - CC_NB = 3, CC_NC = 3, CC_AE = 3, - CC_Z = 4, CC_E = 4, - CC_NZ = 5, CC_NE = 5, - CC_BE = 6, CC_NA = 6, - CC_NBE = 7, CC_A = 7, - CC_S = 8, - CC_NS = 9, - CC_P = 0xA, CC_PE = 0xA, - CC_NP = 0xB, CC_PO = 0xB, - CC_L = 0xC, CC_NGE = 0xC, - CC_NL = 0xD, CC_GE = 0xD, - CC_LE = 0xE, CC_NG = 0xE, - CC_NLE = 0xF, CC_G = 0xF +enum CCFlags { + CC_O = 0, + CC_NO = 1, + CC_B = 2, + CC_C = 2, + CC_NAE = 2, + CC_NB = 3, + CC_NC = 3, + CC_AE = 3, + CC_Z = 4, + CC_E = 4, + CC_NZ = 5, + CC_NE = 5, + CC_BE = 6, + CC_NA = 6, + CC_NBE = 7, + CC_A = 7, + CC_S = 8, + CC_NS = 9, + CC_P = 0xA, + CC_PE = 0xA, + CC_NP = 0xB, + CC_PO = 0xB, + CC_L = 0xC, + CC_NGE = 0xC, + CC_NL = 0xD, + CC_GE = 0xD, + CC_LE = 0xE, + CC_NG = 0xE, + CC_NLE = 0xF, + CC_G = 0xF }; -enum -{ +enum { NUMGPRs = 16, NUMXMMs = 16, }; -enum -{ +enum { SCALE_NONE = 0, SCALE_1 = 1, SCALE_2 = 2, SCALE_4 = 4, SCALE_8 = 8, SCALE_ATREG = 16, - //SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG + // SCALE_NOBASE_1 is not supported and can be replaced with SCALE_ATREG SCALE_NOBASE_2 = 34, SCALE_NOBASE_4 = 36, SCALE_NOBASE_8 = 40, SCALE_RIP = 0xFF, - SCALE_IMM8 = 0xF0, + SCALE_IMM8 = 0xF0, SCALE_IMM16 = 0xF1, SCALE_IMM32 = 0xF2, SCALE_IMM64 = 0xF3, @@ -114,7 +184,7 @@ enum NormalOp { nrmSUB, nrmSBB, nrmAND, - nrmOR , + nrmOR, nrmXOR, nrmMOV, nrmTEST, @@ -157,68 +227,74 @@ enum FloatRound { class XEmitter; // RIP addressing does not benefit from micro op fusion on Core arch -struct OpArg -{ +struct OpArg { friend class XEmitter; - constexpr OpArg() = default; // dummy op arg, used for storage + constexpr OpArg() = default; // dummy op arg, used for storage constexpr OpArg(u64 offset_, int scale_, X64Reg rmReg = RAX, X64Reg scaledReg = RAX) - : scale(static_cast<u8>(scale_)) - , offsetOrBaseReg(static_cast<u16>(rmReg)) - , indexReg(static_cast<u16>(scaledReg)) - , offset(offset_) - { + : scale(static_cast<u8>(scale_)), offsetOrBaseReg(static_cast<u16>(rmReg)), + indexReg(static_cast<u16>(scaledReg)), offset(offset_) { } - constexpr bool operator==(const OpArg &b) const - { - return operandReg == b.operandReg && - scale == b.scale && - offsetOrBaseReg == b.offsetOrBaseReg && - indexReg == b.indexReg && - offset == b.offset; + constexpr bool operator==(const OpArg& b) const { + return operandReg == b.operandReg && scale == b.scale && + offsetOrBaseReg == b.offsetOrBaseReg && indexReg == b.indexReg && offset == b.offset; } - void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; - void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const; - void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const; - void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); - void WriteNormalOp(XEmitter *emit, bool toRM, NormalOp op, const OpArg &operand, int bits) const; - - constexpr bool IsImm() const { return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || scale == SCALE_IMM64; } - constexpr bool IsSimpleReg() const { return scale == SCALE_NONE; } - constexpr bool IsSimpleReg(X64Reg reg) const - { + void WriteRex(XEmitter* emit, int opBits, int bits, int customOp = -1) const; + void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, + int W = 0) const; + void WriteRest(XEmitter* emit, int extraBytes = 0, X64Reg operandReg = INVALID_REG, + bool warn_64bit_offset = true) const; + void WriteSingleByteOp(XEmitter* emit, u8 op, X64Reg operandReg, int bits); + void WriteNormalOp(XEmitter* emit, bool toRM, NormalOp op, const OpArg& operand, + int bits) const; + + constexpr bool IsImm() const { + return scale == SCALE_IMM8 || scale == SCALE_IMM16 || scale == SCALE_IMM32 || + scale == SCALE_IMM64; + } + constexpr bool IsSimpleReg() const { + return scale == SCALE_NONE; + } + constexpr bool IsSimpleReg(X64Reg reg) const { return IsSimpleReg() && GetSimpleReg() == reg; } - int GetImmBits() const - { - switch (scale) - { - case SCALE_IMM8: return 8; - case SCALE_IMM16: return 16; - case SCALE_IMM32: return 32; - case SCALE_IMM64: return 64; - default: return -1; + int GetImmBits() const { + switch (scale) { + case SCALE_IMM8: + return 8; + case SCALE_IMM16: + return 16; + case SCALE_IMM32: + return 32; + case SCALE_IMM64: + return 64; + default: + return -1; } } void SetImmBits(int bits) { - switch (bits) - { - case 8: scale = SCALE_IMM8; break; - case 16: scale = SCALE_IMM16; break; - case 32: scale = SCALE_IMM32; break; - case 64: scale = SCALE_IMM64; break; + switch (bits) { + case 8: + scale = SCALE_IMM8; + break; + case 16: + scale = SCALE_IMM16; + break; + case 32: + scale = SCALE_IMM32; + break; + case 64: + scale = SCALE_IMM64; + break; } } - constexpr X64Reg GetSimpleReg() const - { - return scale == SCALE_NONE - ? static_cast<X64Reg>(offsetOrBaseReg) - : INVALID_REG; + constexpr X64Reg GetSimpleReg() const { + return scale == SCALE_NONE ? static_cast<X64Reg>(offsetOrBaseReg) : INVALID_REG; } constexpr u32 GetImmValue() const { @@ -234,41 +310,50 @@ private: u8 scale = 0; u16 offsetOrBaseReg = 0; u16 indexReg = 0; - u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available. + u64 offset = 0; // use RIP-relative as much as possible - 64-bit immediates are not available. u16 operandReg = 0; }; template <typename T> -inline OpArg M(const T *ptr) { return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP)); } -constexpr OpArg R(X64Reg value) { return OpArg(0, SCALE_NONE, value); } -constexpr OpArg MatR(X64Reg value) { return OpArg(0, SCALE_ATREG, value); } +inline OpArg M(const T* ptr) { + return OpArg(reinterpret_cast<u64>(ptr), static_cast<int>(SCALE_RIP)); +} +constexpr OpArg R(X64Reg value) { + return OpArg(0, SCALE_NONE, value); +} +constexpr OpArg MatR(X64Reg value) { + return OpArg(0, SCALE_ATREG, value); +} -constexpr OpArg MDisp(X64Reg value, int offset) -{ +constexpr OpArg MDisp(X64Reg value, int offset) { return OpArg(static_cast<u32>(offset), SCALE_ATREG, value); } -constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) -{ +constexpr OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) { return OpArg(offset, scale, base, scaled); } -constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) -{ - return scale == SCALE_1 - ? OpArg(offset, SCALE_ATREG, scaled) - : OpArg(offset, scale | 0x20, RAX, scaled); +constexpr OpArg MScaled(X64Reg scaled, int scale, int offset) { + return scale == SCALE_1 ? OpArg(offset, SCALE_ATREG, scaled) + : OpArg(offset, scale | 0x20, RAX, scaled); } -constexpr OpArg MRegSum(X64Reg base, X64Reg offset) -{ +constexpr OpArg MRegSum(X64Reg base, X64Reg offset) { return MComplex(base, offset, 1, 0); } -constexpr OpArg Imm8 (u8 imm) { return OpArg(imm, SCALE_IMM8); } -constexpr OpArg Imm16(u16 imm) { return OpArg(imm, SCALE_IMM16); } //rarely used -constexpr OpArg Imm32(u32 imm) { return OpArg(imm, SCALE_IMM32); } -constexpr OpArg Imm64(u64 imm) { return OpArg(imm, SCALE_IMM64); } +constexpr OpArg Imm8(u8 imm) { + return OpArg(imm, SCALE_IMM8); +} +constexpr OpArg Imm16(u16 imm) { + return OpArg(imm, SCALE_IMM16); +} // rarely used +constexpr OpArg Imm32(u32 imm) { + return OpArg(imm, SCALE_IMM32); +} +constexpr OpArg Imm64(u64 imm) { + return OpArg(imm, SCALE_IMM64); +} constexpr OpArg UImmAuto(u32 imm) { return OpArg(imm, imm >= 128 ? SCALE_IMM32 : SCALE_IMM8); } @@ -277,8 +362,7 @@ constexpr OpArg SImmAuto(s32 imm) { } template <typename T> -OpArg ImmPtr(const T* imm) -{ +OpArg ImmPtr(const T* imm) { #ifdef _ARCH_64 return Imm64(reinterpret_cast<u64>(imm)); #else @@ -286,36 +370,31 @@ OpArg ImmPtr(const T* imm) #endif } -inline u32 PtrOffset(const void* ptr, const void* base) -{ +inline u32 PtrOffset(const void* ptr, const void* base) { #ifdef _ARCH_64 - s64 distance = (s64)ptr-(s64)base; - if (distance >= 0x80000000LL || - distance < -0x80000000LL) - { + s64 distance = (s64)ptr - (s64)base; + if (distance >= 0x80000000LL || distance < -0x80000000LL) { ASSERT_MSG(0, "pointer offset out of range"); return 0; } return (u32)distance; #else - return (u32)ptr-(u32)base; + return (u32)ptr - (u32)base; #endif } -//usage: int a[]; ARRAY_OFFSET(a,10) -#define ARRAY_OFFSET(array,index) ((u32)((u64)&(array)[index]-(u64)&(array)[0])) -//usage: struct {int e;} s; STRUCT_OFFSET(s,e) -#define STRUCT_OFFSET(str,elem) ((u32)((u64)&(str).elem-(u64)&(str))) +// usage: int a[]; ARRAY_OFFSET(a,10) +#define ARRAY_OFFSET(array, index) ((u32)((u64) & (array)[index] - (u64) & (array)[0])) +// usage: struct {int e;} s; STRUCT_OFFSET(s,e) +#define STRUCT_OFFSET(str, elem) ((u32)((u64) & (str).elem - (u64) & (str))) -struct FixupBranch -{ - u8 *ptr; - int type; //0 = 8bit 1 = 32bit +struct FixupBranch { + u8* ptr; + int type; // 0 = 8bit 1 = 32bit }; -enum SSECompare -{ +enum SSECompare { EQ = 0, LT, LE, @@ -326,11 +405,10 @@ enum SSECompare ORD, }; -class XEmitter -{ - friend struct OpArg; // for Write8 etc +class XEmitter { + friend struct OpArg; // for Write8 etc private: - u8 *code; + u8* code; bool flags_locked; void CheckFlags(); @@ -347,14 +425,19 @@ private: void WriteSSSE3Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); void WriteSSE41Op(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp, const OpArg& arg, int extrabytes = 0); - void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); - void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); - void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); - void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, int extrabytes = 0); + void WriteAVXOp(u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, + int extrabytes = 0); + void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, + int extrabytes = 0); + void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, + int extrabytes = 0); + void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, const OpArg& arg, + int extrabytes = 0); void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, const OpArg& arg); - void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); + void WriteNormalOp(XEmitter* emit, int bits, NormalOp op, const OpArg& a1, const OpArg& a2); - void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); + void ABI_CalculateFrameSize(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size, + size_t* shadowp, size_t* subtractionp, size_t* xmm_offsetp); protected: void Write8(u8 value); @@ -363,26 +446,38 @@ protected: void Write64(u64 value); public: - XEmitter() { code = nullptr; flags_locked = false; } - XEmitter(u8 *code_ptr) { code = code_ptr; flags_locked = false; } - virtual ~XEmitter() {} + XEmitter() { + code = nullptr; + flags_locked = false; + } + XEmitter(u8* code_ptr) { + code = code_ptr; + flags_locked = false; + } + virtual ~XEmitter() { + } void WriteModRM(int mod, int rm, int reg); void WriteSIB(int scale, int index, int base); - void SetCodePtr(u8 *ptr); + void SetCodePtr(u8* ptr); void ReserveCodeSpace(int bytes); - const u8 *AlignCode4(); - const u8 *AlignCode16(); - const u8 *AlignCodePage(); - const u8 *GetCodePtr() const; - u8 *GetWritableCodePtr(); - - void LockFlags() { flags_locked = true; } - void UnlockFlags() { flags_locked = false; } + const u8* AlignCode4(); + const u8* AlignCode16(); + const u8* AlignCodePage(); + const u8* GetCodePtr() const; + u8* GetWritableCodePtr(); + + void LockFlags() { + flags_locked = true; + } + void UnlockFlags() { + flags_locked = false; + } // Looking for one of these? It's BANNED!! Some instructions are slow on modern CPU - // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other string instr., + // INC, DEC, LOOP, LOOPNE, LOOPE, ENTER, LEAVE, XCHG, XLAT, REP MOVSB/MOVSD, REP SCASD + other + // string instr., // INC and DEC are slow on Intel Core, but not on AMD. They create a // false flag dependency because they only update a subset of the flags. // XCHG is SLOW and should be avoided. @@ -401,11 +496,11 @@ public: void CLC(); void CMC(); - // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and AMD! + // These two can not be executed in 64-bit mode on early Intel 64-bit CPU:s, only on Core2 and + // AMD! void LAHF(); // 3 cycle vector path void SAHF(); // direct path fast - // Stack control void PUSH(X64Reg reg); void POP(X64Reg reg); @@ -422,7 +517,7 @@ public: void JMP(const u8* addr, bool force5Bytes = false); void JMPptr(const OpArg& arg); - void JMPself(); //infinite loop! + void JMPself(); // infinite loop! #ifdef CALL #undef CALL #endif @@ -450,12 +545,11 @@ public: void BSR(int bits, X64Reg dest, const OpArg& src); // Top bit to bottom bit // Cache control - enum PrefetchLevel - { - PF_NTA, //Non-temporal (data used once and only once) - PF_T0, //All cache levels - PF_T1, //Levels 2+ (aliased to T0 on AMD) - PF_T2, //Levels 3+ (aliased to T0 on AMD) + enum PrefetchLevel { + PF_NTA, // Non-temporal (data used once and only once) + PF_T0, // All cache levels + PF_T1, // Levels 2+ (aliased to T0 on AMD) + PF_T2, // Levels 3+ (aliased to T0 on AMD) }; void PREFETCH(PrefetchLevel level, OpArg arg); void MOVNTI(int bits, const OpArg& dest, X64Reg src); @@ -464,8 +558,8 @@ public: void MOVNTPD(const OpArg& arg, X64Reg regOp); // Multiplication / division - void MUL(int bits, const OpArg& src); //UNSIGNED - void IMUL(int bits, const OpArg& src); //SIGNED + void MUL(int bits, const OpArg& src); // UNSIGNED + void IMUL(int bits, const OpArg& src); // SIGNED void IMUL(int bits, X64Reg regOp, const OpArg& src); void IMUL(int bits, X64Reg regOp, const OpArg& src, const OpArg& imm); void DIV(int bits, const OpArg& src); @@ -492,11 +586,19 @@ public: // Extend EAX into EDX in various ways void CWD(int bits = 16); - void CDQ() {CWD(32);} - void CQO() {CWD(64);} + void CDQ() { + CWD(32); + } + void CQO() { + CWD(64); + } void CBW(int bits = 8); - void CWDE() {CBW(16);} - void CDQE() {CBW(32);} + void CWDE() { + CBW(16); + } + void CDQE() { + CBW(32); + } // Load effective address void LEA(int bits, X64Reg dest, OpArg src); @@ -511,7 +613,7 @@ public: void CMP(int bits, const OpArg& a1, const OpArg& a2); // Bit operations - void NOT (int bits, const OpArg& src); + void NOT(int bits, const OpArg& src); void OR(int bits, const OpArg& a1, const OpArg& a2); void XOR(int bits, const OpArg& a1, const OpArg& a2); void MOV(int bits, const OpArg& a1, const OpArg& a2); @@ -525,7 +627,8 @@ public: void BSWAP(int bits, X64Reg reg); // Sign/zero extension - void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src); //automatically uses MOVSXD if necessary + void MOVSX(int dbits, int sbits, X64Reg dest, + OpArg src); // automatically uses MOVSXD if necessary void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src); // Available only on Atom or >= Haswell so far. Test with GetCPUCaps().movbe. @@ -593,13 +696,27 @@ public: void CMPSS(X64Reg regOp, const OpArg& arg, u8 compare); void CMPSD(X64Reg regOp, const OpArg& arg, u8 compare); - void CMPEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_EQ); } - void CMPLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LT); } - void CMPLESS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_LE); } - void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_UNORD); } - void CMPNEQSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NEQ); } - void CMPNLTSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_NLT); } - void CMPORDSS(X64Reg regOp, const OpArg& arg) { CMPSS(regOp, arg, CMP_ORD); } + void CMPEQSS(X64Reg regOp, const OpArg& arg) { + CMPSS(regOp, arg, CMP_EQ); + } + void CMPLTSS(X64Reg regOp, const OpArg& arg) { + CMPSS(regOp, arg, CMP_LT); + } + void CMPLESS(X64Reg regOp, const OpArg& arg) { + CMPSS(regOp, arg, CMP_LE); + } + void CMPUNORDSS(X64Reg regOp, const OpArg& arg) { + CMPSS(regOp, arg, CMP_UNORD); + } + void CMPNEQSS(X64Reg regOp, const OpArg& arg) { + CMPSS(regOp, arg, CMP_NEQ); + } + void CMPNLTSS(X64Reg regOp, const OpArg& arg) { + CMPSS(regOp, arg, CMP_NLT); + } + void CMPORDSS(X64Reg regOp, const OpArg& arg) { + CMPSS(regOp, arg, CMP_ORD); + } // SSE/SSE2: Floating point packed arithmetic (x4 for float, x2 for double) void ADDPS(X64Reg regOp, const OpArg& arg); @@ -638,10 +755,12 @@ public: // SSE/SSE2: Useful alternative to shuffle in some cases. void MOVDDUP(X64Reg regOp, const OpArg& arg); - // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. + // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily + // on Ivy. void HADDPS(X64Reg dest, const OpArg& src); - // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". + // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg + // contains both a read mask and a write "mask". void DPPS(X64Reg dest, const OpArg& src, u8 arg); void UNPCKLPS(X64Reg dest, const OpArg& src); @@ -694,11 +813,13 @@ public: void MOVD_xmm(const OpArg& arg, X64Reg src); void MOVQ_xmm(OpArg arg, X64Reg src); - // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in question. + // SSE/SSE2: Generates a mask from the high bits of the components of the packed register in + // question. void MOVMSKPS(X64Reg dest, const OpArg& arg); void MOVMSKPD(X64Reg dest, const OpArg& arg); - // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a weird one. + // SSE2: Selective byte store, mask in src register. EDI/RDI specifies store address. This is a + // weird one. void MASKMOVDQU(X64Reg dest, X64Reg src); void LDDQU(X64Reg dest, const OpArg& src); @@ -729,10 +850,10 @@ public: void PACKUSDW(X64Reg dest, const OpArg& arg); void PACKUSWB(X64Reg dest, const OpArg& arg); - void PUNPCKLBW(X64Reg dest, const OpArg &arg); - void PUNPCKLWD(X64Reg dest, const OpArg &arg); - void PUNPCKLDQ(X64Reg dest, const OpArg &arg); - void PUNPCKLQDQ(X64Reg dest, const OpArg &arg); + void PUNPCKLBW(X64Reg dest, const OpArg& arg); + void PUNPCKLWD(X64Reg dest, const OpArg& arg); + void PUNPCKLDQ(X64Reg dest, const OpArg& arg); + void PUNPCKLQDQ(X64Reg dest, const OpArg& arg); void PTEST(X64Reg dest, const OpArg& arg); void PAND(X64Reg dest, const OpArg& arg); @@ -839,25 +960,57 @@ public: void ROUNDPS(X64Reg dest, const OpArg& arg, u8 mode); void ROUNDPD(X64Reg dest, const OpArg& arg, u8 mode); - void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_NEAREST); } - void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_FLOOR); } - void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_CEIL); } - void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { ROUNDSS(dest, arg, FROUND_ZERO); } + void ROUNDNEARSS(X64Reg dest, const OpArg& arg) { + ROUNDSS(dest, arg, FROUND_NEAREST); + } + void ROUNDFLOORSS(X64Reg dest, const OpArg& arg) { + ROUNDSS(dest, arg, FROUND_FLOOR); + } + void ROUNDCEILSS(X64Reg dest, const OpArg& arg) { + ROUNDSS(dest, arg, FROUND_CEIL); + } + void ROUNDZEROSS(X64Reg dest, const OpArg& arg) { + ROUNDSS(dest, arg, FROUND_ZERO); + } - void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_NEAREST); } - void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_FLOOR); } - void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_CEIL); } - void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { ROUNDSD(dest, arg, FROUND_ZERO); } + void ROUNDNEARSD(X64Reg dest, const OpArg& arg) { + ROUNDSD(dest, arg, FROUND_NEAREST); + } + void ROUNDFLOORSD(X64Reg dest, const OpArg& arg) { + ROUNDSD(dest, arg, FROUND_FLOOR); + } + void ROUNDCEILSD(X64Reg dest, const OpArg& arg) { + ROUNDSD(dest, arg, FROUND_CEIL); + } + void ROUNDZEROSD(X64Reg dest, const OpArg& arg) { + ROUNDSD(dest, arg, FROUND_ZERO); + } - void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_NEAREST); } - void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_FLOOR); } - void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_CEIL); } - void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { ROUNDPS(dest, arg, FROUND_ZERO); } + void ROUNDNEARPS(X64Reg dest, const OpArg& arg) { + ROUNDPS(dest, arg, FROUND_NEAREST); + } + void ROUNDFLOORPS(X64Reg dest, const OpArg& arg) { + ROUNDPS(dest, arg, FROUND_FLOOR); + } + void ROUNDCEILPS(X64Reg dest, const OpArg& arg) { + ROUNDPS(dest, arg, FROUND_CEIL); + } + void ROUNDZEROPS(X64Reg dest, const OpArg& arg) { + ROUNDPS(dest, arg, FROUND_ZERO); + } - void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_NEAREST); } - void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_FLOOR); } - void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_CEIL); } - void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { ROUNDPD(dest, arg, FROUND_ZERO); } + void ROUNDNEARPD(X64Reg dest, const OpArg& arg) { + ROUNDPD(dest, arg, FROUND_NEAREST); + } + void ROUNDFLOORPD(X64Reg dest, const OpArg& arg) { + ROUNDPD(dest, arg, FROUND_FLOOR); + } + void ROUNDCEILPD(X64Reg dest, const OpArg& arg) { + ROUNDPD(dest, arg, FROUND_CEIL); + } + void ROUNDZEROPD(X64Reg dest, const OpArg& arg) { + ROUNDPD(dest, arg, FROUND_ZERO); + } // AVX void VADDSD(X64Reg regOp1, X64Reg regOp2, const OpArg& arg); @@ -981,7 +1134,6 @@ public: void ABI_CallFunctionC16(const void* func, u16 param1); void ABI_CallFunctionCC16(const void* func, u32 param1, u16 param2); - // These only support u32 parameters, but that's enough for a lot of uses. // These will destroy the 1 or 2 first "parameter regs". void ABI_CallFunctionC(const void* func, u32 param1); @@ -1012,29 +1164,38 @@ public: * * @param mask Registers to push on the stack (high 16 bits are XMMs, low 16 bits are GPRs) * @param rsp_alignment Current alignment of the stack pointer, must be 0 or 8 - * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the stack + * @param needed_frame_size Additional space needed, e.g., for function arguments passed on the + * stack * @return Size of the shadow space, i.e., offset of the frame */ - size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); + size_t ABI_PushRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, + size_t needed_frame_size = 0); /** - * Restores specified registers and adjusts the stack to its original alignment, i.e., the alignment before + * Restores specified registers and adjusts the stack to its original alignment, i.e., the + * alignment before * the matching PushRegistersAndAdjustStack. * - * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are GPRs) - * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must be 0 or 8 + * @param mask Registers to restores from the stack (high 16 bits are XMMs, low 16 bits are + * GPRs) + * @param rsp_alignment Original alignment before the matching PushRegistersAndAdjustStack, must + * be 0 or 8 * @param needed_frame_size Additional space that was needed * @warning Stack must be currently 16-byte aligned */ - void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, size_t needed_frame_size = 0); - - #ifdef _M_IX86 - static int ABI_GetNumXMMRegs() { return 8; } - #else - static int ABI_GetNumXMMRegs() { return 16; } - #endif -}; // class XEmitter + void ABI_PopRegistersAndAdjustStack(BitSet32 mask, size_t rsp_alignment, + size_t needed_frame_size = 0); +#ifdef _M_IX86 + static int ABI_GetNumXMMRegs() { + return 8; + } +#else + static int ABI_GetNumXMMRegs() { + return 16; + } +#endif +}; // class XEmitter // Everything that needs to generate X86 code should inherit from this. // You get memory management for free, plus, you can use all the MOV etc functions without @@ -1045,4 +1206,4 @@ public: void PoisonMemory() override; }; -} // namespace +} // namespace |