////////////////////////////////////////////////////////////////////////////// // // Core Detours Functionality (detours.cpp of detours.lib) // // Microsoft Research Detours Package, Version 4.0.1 // // Copyright (c) Microsoft Corporation. All rights reserved. // //#define DETOUR_DEBUG 1 #define DETOURS_INTERNAL #include "detours.h" #if DETOURS_VERSION != 0x4c0c1 // 0xMAJORcMINORcPATCH #error detours.h version mismatch #endif #define NOTHROW ////////////////////////////////////////////////////////////////////////////// // #ifdef _DEBUG extern "C" IMAGE_DOS_HEADER __ImageBase; int Detour_AssertExprWithFunctionName(int reportType, const char* filename, int linenumber, const char* FunctionName, const char* msg) { int nRet = 0; DWORD dwLastError = GetLastError(); CHAR szModuleNameWithFunctionName[MAX_PATH * 2]; szModuleNameWithFunctionName[0] = 0; GetModuleFileNameA((HMODULE)&__ImageBase, szModuleNameWithFunctionName, ARRAYSIZE(szModuleNameWithFunctionName)); StringCchCatNA(szModuleNameWithFunctionName, ARRAYSIZE(szModuleNameWithFunctionName), ",", ARRAYSIZE(szModuleNameWithFunctionName) - strlen(szModuleNameWithFunctionName) - 1); StringCchCatNA(szModuleNameWithFunctionName, ARRAYSIZE(szModuleNameWithFunctionName), FunctionName, ARRAYSIZE(szModuleNameWithFunctionName) - strlen(szModuleNameWithFunctionName) - 1); SetLastError(dwLastError); nRet = _CrtDbgReport(reportType, filename, linenumber, szModuleNameWithFunctionName, msg); SetLastError(dwLastError); return nRet; } #endif// _DEBUG ////////////////////////////////////////////////////////////////////////////// // struct _DETOUR_ALIGN { BYTE obTarget : 3; BYTE obTrampoline : 5; }; C_ASSERT(sizeof(_DETOUR_ALIGN) == 1); ////////////////////////////////////////////////////////////////////////////// // // Region reserved for system DLLs, which cannot be used for trampolines. // static PVOID s_pSystemRegionLowerBound = (PVOID)(ULONG_PTR)0x70000000; static PVOID s_pSystemRegionUpperBound = (PVOID)(ULONG_PTR)0x80000000; ////////////////////////////////////////////////////////////////////////////// // static bool detour_is_imported(PBYTE pbCode, PBYTE pbAddress) { MEMORY_BASIC_INFORMATION mbi; VirtualQuery((PVOID)pbCode, &mbi, sizeof(mbi)); __try { PIMAGE_DOS_HEADER pDosHeader = (PIMAGE_DOS_HEADER)mbi.AllocationBase; if (pDosHeader->e_magic != IMAGE_DOS_SIGNATURE) { return false; } PIMAGE_NT_HEADERS pNtHeader = (PIMAGE_NT_HEADERS)((PBYTE)pDosHeader + pDosHeader->e_lfanew); if (pNtHeader->Signature != IMAGE_NT_SIGNATURE) { return false; } if (pbAddress >= ((PBYTE)pDosHeader + pNtHeader->OptionalHeader .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress) && pbAddress < ((PBYTE)pDosHeader + pNtHeader->OptionalHeader .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].VirtualAddress + pNtHeader->OptionalHeader .DataDirectory[IMAGE_DIRECTORY_ENTRY_IAT].Size)) { return true; } } #pragma prefast(suppress:28940, "A bad pointer means this probably isn't a PE header.") __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { return false; } return false; } inline ULONG_PTR detour_2gb_below(ULONG_PTR address) { return (address > (ULONG_PTR)0x7ff80000) ? address - 0x7ff80000 : 0x80000; } inline ULONG_PTR detour_2gb_above(ULONG_PTR address) { #if defined(DETOURS_64BIT) return (address < (ULONG_PTR)0xffffffff80000000) ? address + 0x7ff80000 : (ULONG_PTR)0xfffffffffff80000; #else return (address < (ULONG_PTR)0x80000000) ? address + 0x7ff80000 : (ULONG_PTR)0xfff80000; #endif } ///////////////////////////////////////////////////////////////////////// X86. // #ifdef DETOURS_X86 struct _DETOUR_TRAMPOLINE { BYTE rbCode[30]; // target code + jmp to pbRemain BYTE cbCode; // size of moved target code. BYTE cbCodeBreak; // padding to make debugging easier. BYTE rbRestore[22]; // original target code. BYTE cbRestore; // size of original target code. BYTE cbRestoreBreak; // padding to make debugging easier. _DETOUR_ALIGN rAlign[8]; // instruction alignment array. PBYTE pbRemain; // first instruction after moved code. [free list] PBYTE pbDetour; // first instruction of detour function. }; C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 72); enum { SIZE_OF_JMP = 5 }; inline PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE pbJmpVal) { PBYTE pbJmpSrc = pbCode + 5; *pbCode++ = 0xE9; // jmp +imm32 *((INT32*&)pbCode)++ = (INT32)(pbJmpVal - pbJmpSrc); return pbCode; } inline PBYTE detour_gen_jmp_indirect(PBYTE pbCode, PBYTE *ppbJmpVal) { *pbCode++ = 0xff; // jmp [+imm32] *pbCode++ = 0x25; *((INT32*&)pbCode)++ = (INT32)((PBYTE)ppbJmpVal); return pbCode; } inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit) { while (pbCode < pbLimit) { *pbCode++ = 0xcc; // brk; } return pbCode; } inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals) { if (pbCode == NULL) { return NULL; } if (ppGlobals != NULL) { *ppGlobals = NULL; } // First, skip over the import vector if there is one. if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [imm32] // Looks like an import alias jump, then get the code it points to. PBYTE pbTarget = *(UNALIGNED PBYTE *)&pbCode[2]; if (detour_is_imported(pbCode, pbTarget)) { PBYTE pbNew = *(UNALIGNED PBYTE *)pbTarget; DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew)); pbCode = pbNew; } } // Then, skip over a patch jump if (pbCode[0] == 0xeb) { // jmp +imm8 PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1]; DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew)); pbCode = pbNew; // First, skip over the import vector if there is one. if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [imm32] // Looks like an import alias jump, then get the code it points to. PBYTE pbTarget = *(UNALIGNED PBYTE *)&pbCode[2]; if (detour_is_imported(pbCode, pbTarget)) { pbNew = *(UNALIGNED PBYTE *)pbTarget; DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew)); pbCode = pbNew; } } // Finally, skip over a long jump if it is the target of the patch jump. else if (pbCode[0] == 0xe9) { // jmp +imm32 pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1]; DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew)); pbCode = pbNew; } } return pbCode; } inline void detour_find_jmp_bounds(PBYTE pbCode, PDETOUR_TRAMPOLINE *ppLower, PDETOUR_TRAMPOLINE *ppUpper) { // We have to place trampolines within +/- 2GB of code. ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode); ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode); DETOUR_TRACE(("[%p..%p..%p]\n", (PVOID)lo, pbCode, (PVOID)hi)); // And, within +/- 2GB of relative jmp targets. if (pbCode[0] == 0xe9) { // jmp +imm32 PBYTE pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1]; if (pbNew < pbCode) { hi = detour_2gb_above((ULONG_PTR)pbNew); } else { lo = detour_2gb_below((ULONG_PTR)pbNew); } DETOUR_TRACE(("[%p..%p..%p] +imm32\n", (PVOID)lo, pbCode, (PVOID)hi)); } *ppLower = (PDETOUR_TRAMPOLINE)lo; *ppUpper = (PDETOUR_TRAMPOLINE)hi; } inline BOOL detour_does_code_end_function(PBYTE pbCode) { if (pbCode[0] == 0xeb || // jmp +imm8 pbCode[0] == 0xe9 || // jmp +imm32 pbCode[0] == 0xe0 || // jmp eax pbCode[0] == 0xc2 || // ret +imm8 pbCode[0] == 0xc3 || // ret pbCode[0] == 0xcc) { // brk return TRUE; } else if (pbCode[0] == 0xf3 && pbCode[1] == 0xc3) { // rep ret return TRUE; } else if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32] return TRUE; } else if ((pbCode[0] == 0x26 || // jmp es: pbCode[0] == 0x2e || // jmp cs: pbCode[0] == 0x36 || // jmp ss: pbCode[0] == 0x3e || // jmp ds: pbCode[0] == 0x64 || // jmp fs: pbCode[0] == 0x65) && // jmp gs: pbCode[1] == 0xff && // jmp [+imm32] pbCode[2] == 0x25) { return TRUE; } return FALSE; } inline ULONG detour_is_code_filler(PBYTE pbCode) { // 1-byte through 11-byte NOPs. if (pbCode[0] == 0x90) { return 1; } if (pbCode[0] == 0x66 && pbCode[1] == 0x90) { return 2; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x00) { return 3; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x40 && pbCode[3] == 0x00) { return 4; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x44 && pbCode[3] == 0x00 && pbCode[4] == 0x00) { return 5; } if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F && pbCode[3] == 0x44 && pbCode[4] == 0x00 && pbCode[5] == 0x00) { return 6; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x80 && pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 && pbCode[6] == 0x00) { return 7; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x84 && pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 && pbCode[6] == 0x00 && pbCode[7] == 0x00) { return 8; } if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F && pbCode[3] == 0x84 && pbCode[4] == 0x00 && pbCode[5] == 0x00 && pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00) { return 9; } if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x0F && pbCode[3] == 0x1F && pbCode[4] == 0x84 && pbCode[5] == 0x00 && pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 && pbCode[9] == 0x00) { return 10; } if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x66 && pbCode[3] == 0x0F && pbCode[4] == 0x1F && pbCode[5] == 0x84 && pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 && pbCode[9] == 0x00 && pbCode[10] == 0x00) { return 11; } // int 3. if (pbCode[0] == 0xcc) { return 1; } return 0; } #endif // DETOURS_X86 ///////////////////////////////////////////////////////////////////////// X64. // #ifdef DETOURS_X64 struct _DETOUR_TRAMPOLINE { // An X64 instuction can be 15 bytes long. // In practice 11 seems to be the limit. BYTE rbCode[30]; // target code + jmp to pbRemain. BYTE cbCode; // size of moved target code. BYTE cbCodeBreak; // padding to make debugging easier. BYTE rbRestore[30]; // original target code. BYTE cbRestore; // size of original target code. BYTE cbRestoreBreak; // padding to make debugging easier. _DETOUR_ALIGN rAlign[8]; // instruction alignment array. PBYTE pbRemain; // first instruction after moved code. [free list] PBYTE pbDetour; // first instruction of detour function. BYTE rbCodeIn[8]; // jmp [pbDetour] }; C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 96); enum { SIZE_OF_JMP = 5 }; inline PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE pbJmpVal) { PBYTE pbJmpSrc = pbCode + 5; *pbCode++ = 0xE9; // jmp +imm32 *((INT32*&)pbCode)++ = (INT32)(pbJmpVal - pbJmpSrc); return pbCode; } inline PBYTE detour_gen_jmp_indirect(PBYTE pbCode, PBYTE *ppbJmpVal) { PBYTE pbJmpSrc = pbCode + 6; *pbCode++ = 0xff; // jmp [+imm32] *pbCode++ = 0x25; *((INT32*&)pbCode)++ = (INT32)((PBYTE)ppbJmpVal - pbJmpSrc); return pbCode; } inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit) { while (pbCode < pbLimit) { *pbCode++ = 0xcc; // brk; } return pbCode; } inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals) { if (pbCode == NULL) { return NULL; } if (ppGlobals != NULL) { *ppGlobals = NULL; } // First, skip over the import vector if there is one. if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32] // Looks like an import alias jump, then get the code it points to. PBYTE pbTarget = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2]; if (detour_is_imported(pbCode, pbTarget)) { PBYTE pbNew = *(UNALIGNED PBYTE *)pbTarget; DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew)); pbCode = pbNew; } } // Then, skip over a patch jump if (pbCode[0] == 0xeb) { // jmp +imm8 PBYTE pbNew = pbCode + 2 + *(CHAR *)&pbCode[1]; DETOUR_TRACE(("%p->%p: skipped over short jump.\n", pbCode, pbNew)); pbCode = pbNew; // First, skip over the import vector if there is one. if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32] // Looks like an import alias jump, then get the code it points to. PBYTE pbTarget = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2]; if (detour_is_imported(pbCode, pbTarget)) { pbNew = *(UNALIGNED PBYTE *)pbTarget; DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew)); pbCode = pbNew; } } // Finally, skip over a long jump if it is the target of the patch jump. else if (pbCode[0] == 0xe9) { // jmp +imm32 pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1]; DETOUR_TRACE(("%p->%p: skipped over long jump.\n", pbCode, pbNew)); pbCode = pbNew; } } return pbCode; } inline void detour_find_jmp_bounds(PBYTE pbCode, PDETOUR_TRAMPOLINE *ppLower, PDETOUR_TRAMPOLINE *ppUpper) { // We have to place trampolines within +/- 2GB of code. ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode); ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode); DETOUR_TRACE(("[%p..%p..%p]\n", (PVOID)lo, pbCode, (PVOID)hi)); // And, within +/- 2GB of relative jmp vectors. if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32] PBYTE pbNew = pbCode + 6 + *(UNALIGNED INT32 *)&pbCode[2]; if (pbNew < pbCode) { hi = detour_2gb_above((ULONG_PTR)pbNew); } else { lo = detour_2gb_below((ULONG_PTR)pbNew); } DETOUR_TRACE(("[%p..%p..%p] [+imm32]\n", (PVOID)lo, pbCode, (PVOID)hi)); } // And, within +/- 2GB of relative jmp targets. else if (pbCode[0] == 0xe9) { // jmp +imm32 PBYTE pbNew = pbCode + 5 + *(UNALIGNED INT32 *)&pbCode[1]; if (pbNew < pbCode) { hi = detour_2gb_above((ULONG_PTR)pbNew); } else { lo = detour_2gb_below((ULONG_PTR)pbNew); } DETOUR_TRACE(("[%p..%p..%p] +imm32\n", (PVOID)lo, pbCode, (PVOID)hi)); } *ppLower = (PDETOUR_TRAMPOLINE)lo; *ppUpper = (PDETOUR_TRAMPOLINE)hi; } inline BOOL detour_does_code_end_function(PBYTE pbCode) { if (pbCode[0] == 0xeb || // jmp +imm8 pbCode[0] == 0xe9 || // jmp +imm32 pbCode[0] == 0xe0 || // jmp eax pbCode[0] == 0xc2 || // ret +imm8 pbCode[0] == 0xc3 || // ret pbCode[0] == 0xcc) { // brk return TRUE; } else if (pbCode[0] == 0xf3 && pbCode[1] == 0xc3) { // rep ret return TRUE; } else if (pbCode[0] == 0xff && pbCode[1] == 0x25) { // jmp [+imm32] return TRUE; } else if ((pbCode[0] == 0x26 || // jmp es: pbCode[0] == 0x2e || // jmp cs: pbCode[0] == 0x36 || // jmp ss: pbCode[0] == 0x3e || // jmp ds: pbCode[0] == 0x64 || // jmp fs: pbCode[0] == 0x65) && // jmp gs: pbCode[1] == 0xff && // jmp [+imm32] pbCode[2] == 0x25) { return TRUE; } return FALSE; } inline ULONG detour_is_code_filler(PBYTE pbCode) { // 1-byte through 11-byte NOPs. if (pbCode[0] == 0x90) { return 1; } if (pbCode[0] == 0x66 && pbCode[1] == 0x90) { return 2; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x00) { return 3; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x40 && pbCode[3] == 0x00) { return 4; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x44 && pbCode[3] == 0x00 && pbCode[4] == 0x00) { return 5; } if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F && pbCode[3] == 0x44 && pbCode[4] == 0x00 && pbCode[5] == 0x00) { return 6; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x80 && pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 && pbCode[6] == 0x00) { return 7; } if (pbCode[0] == 0x0F && pbCode[1] == 0x1F && pbCode[2] == 0x84 && pbCode[3] == 0x00 && pbCode[4] == 0x00 && pbCode[5] == 0x00 && pbCode[6] == 0x00 && pbCode[7] == 0x00) { return 8; } if (pbCode[0] == 0x66 && pbCode[1] == 0x0F && pbCode[2] == 0x1F && pbCode[3] == 0x84 && pbCode[4] == 0x00 && pbCode[5] == 0x00 && pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00) { return 9; } if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x0F && pbCode[3] == 0x1F && pbCode[4] == 0x84 && pbCode[5] == 0x00 && pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 && pbCode[9] == 0x00) { return 10; } if (pbCode[0] == 0x66 && pbCode[1] == 0x66 && pbCode[2] == 0x66 && pbCode[3] == 0x0F && pbCode[4] == 0x1F && pbCode[5] == 0x84 && pbCode[6] == 0x00 && pbCode[7] == 0x00 && pbCode[8] == 0x00 && pbCode[9] == 0x00 && pbCode[10] == 0x00) { return 11; } // int 3. if (pbCode[0] == 0xcc) { return 1; } return 0; } #endif // DETOURS_X64 //////////////////////////////////////////////////////////////////////// IA64. // #ifdef DETOURS_IA64 struct _DETOUR_TRAMPOLINE { // On the IA64, a trampoline is used for both incoming and outgoing calls. // // The trampoline contains the following bundles for the outgoing call: // movl gp=target_gp; // // brl target_code; // // The trampoline contains the following bundles for the incoming call: // alloc r41=ar.pfs, b, 0, 8, 0 // mov r40=rp // // adds r50=0, r39 // adds r49=0, r38 // adds r48=0, r37 ;; // // adds r47=0, r36 // adds r46=0, r35 // adds r45=0, r34 // // adds r44=0, r33 // adds r43=0, r32 // adds r42=0, gp ;; // // movl gp=ffffffff`ffffffff ;; // // brl.call.sptk.few rp=disas!TestCodes+20e0 (00000000`00404ea0) ;; // // adds gp=0, r42 // mov rp=r40, +0 ;; // mov.i ar.pfs=r41 // // br.ret.sptk.many rp ;; // // This way, we only have to relocate a single bundle. // // The complicated incoming trampoline is required because we have to // create an additional stack frame so that we save and restore the gp. // We must do this because gp is a caller-saved register, but not saved // if the caller thinks the target is in the same DLL, which changes // when we insert a detour. // DETOUR_IA64_BUNDLE bMovlTargetGp; // Bundle which sets target GP BYTE rbCode[sizeof(DETOUR_IA64_BUNDLE)]; // moved bundle. DETOUR_IA64_BUNDLE bBrlRemainEip; // Brl to pbRemain // This must be adjacent to bBranchIslands. // Each instruction in the moved bundle could be a IP-relative chk or branch or call. // Any such instructions are changed to point to a brl in bBranchIslands. // This must be adjacent to bBrlRemainEip -- see "pbPool". DETOUR_IA64_BUNDLE bBranchIslands[DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE]; // Target of brl inserted in target function DETOUR_IA64_BUNDLE bAllocFrame; // alloc frame DETOUR_IA64_BUNDLE bSave37to39; // save r37, r38, r39. DETOUR_IA64_BUNDLE bSave34to36; // save r34, r35, r36. DETOUR_IA64_BUNDLE bSaveGPto33; // save gp, r32, r33. DETOUR_IA64_BUNDLE bMovlDetourGp; // set detour GP. DETOUR_IA64_BUNDLE bCallDetour; // call detour. DETOUR_IA64_BUNDLE bPopFrameGp; // pop frame and restore gp. DETOUR_IA64_BUNDLE bReturn; // return to caller. PLABEL_DESCRIPTOR pldTrampoline; BYTE rbRestore[sizeof(DETOUR_IA64_BUNDLE)]; // original target bundle. BYTE cbRestore; // size of original target code. BYTE cbCode; // size of moved target code. _DETOUR_ALIGN rAlign[14]; // instruction alignment array. PBYTE pbRemain; // first instruction after moved code. [free list] PBYTE pbDetour; // first instruction of detour function. PPLABEL_DESCRIPTOR ppldDetour; // [pbDetour,gpDetour] PPLABEL_DESCRIPTOR ppldTarget; // [pbTarget,gpDetour] }; C_ASSERT(sizeof(DETOUR_IA64_BUNDLE) == 16); C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 256 + DETOUR_IA64_INSTRUCTIONS_PER_BUNDLE * 16); enum { SIZE_OF_JMP = sizeof(DETOUR_IA64_BUNDLE) }; inline PBYTE detour_skip_jmp(PBYTE pPointer, PVOID *ppGlobals) { PBYTE pGlobals = NULL; PBYTE pbCode = NULL; if (pPointer != NULL) { PPLABEL_DESCRIPTOR ppld = (PPLABEL_DESCRIPTOR)pPointer; pbCode = (PBYTE)ppld->EntryPoint; pGlobals = (PBYTE)ppld->GlobalPointer; } if (ppGlobals != NULL) { *ppGlobals = pGlobals; } if (pbCode == NULL) { return NULL; } DETOUR_IA64_BUNDLE *pb = (DETOUR_IA64_BUNDLE *)pbCode; // IA64 Local Import Jumps look like: // addl r2=ffffffff`ffe021c0, gp ;; // ld8 r2=[r2] // nop.i 0 ;; // // ld8 r3=[r2], 8 ;; // ld8 gp=[r2] // mov b6=r3, +0 // // nop.m 0 // nop.i 0 // br.cond.sptk.few b6 // // 002024000200100b if ((pb[0].wide[0] & 0xfffffc000603ffff) == 0x002024000200100b && pb[0].wide[1] == 0x0004000000203008 && pb[1].wide[0] == 0x001014180420180a && pb[1].wide[1] == 0x07000830c0203008 && pb[2].wide[0] == 0x0000000100000010 && pb[2].wide[1] == 0x0080006000000200) { ULONG64 offset = ((pb[0].wide[0] & 0x0000000001fc0000) >> 18) | // imm7b ((pb[0].wide[0] & 0x000001ff00000000) >> 25) | // imm9d ((pb[0].wide[0] & 0x00000000f8000000) >> 11); // imm5c if (pb[0].wide[0] & 0x0000020000000000) { // sign offset |= 0xffffffffffe00000; } PBYTE pbTarget = pGlobals + offset; DETOUR_TRACE(("%p: potential import jump, target=%p\n", pb, pbTarget)); if (detour_is_imported(pbCode, pbTarget) && *(PBYTE*)pbTarget != NULL) { DETOUR_TRACE(("%p: is import jump, label=%p\n", pb, *(PBYTE *)pbTarget)); PPLABEL_DESCRIPTOR ppld = (PPLABEL_DESCRIPTOR)*(PBYTE *)pbTarget; pbCode = (PBYTE)ppld->EntryPoint; pGlobals = (PBYTE)ppld->GlobalPointer; if (ppGlobals != NULL) { *ppGlobals = pGlobals; } } } return pbCode; } inline void detour_find_jmp_bounds(PBYTE pbCode, PDETOUR_TRAMPOLINE *ppLower, PDETOUR_TRAMPOLINE *ppUpper) { (void)pbCode; *ppLower = (PDETOUR_TRAMPOLINE)(ULONG_PTR)0x0000000000080000; *ppUpper = (PDETOUR_TRAMPOLINE)(ULONG_PTR)0xfffffffffff80000; } inline BOOL detour_does_code_end_function(PBYTE pbCode) { // Routine not needed on IA64. (void)pbCode; return FALSE; } inline ULONG detour_is_code_filler(PBYTE pbCode) { // Routine not needed on IA64. (void)pbCode; return 0; } #endif // DETOURS_IA64 #ifdef DETOURS_ARM struct _DETOUR_TRAMPOLINE { // A Thumb-2 instruction can be 2 or 4 bytes long. BYTE rbCode[62]; // target code + jmp to pbRemain BYTE cbCode; // size of moved target code. BYTE cbCodeBreak; // padding to make debugging easier. BYTE rbRestore[22]; // original target code. BYTE cbRestore; // size of original target code. BYTE cbRestoreBreak; // padding to make debugging easier. _DETOUR_ALIGN rAlign[8]; // instruction alignment array. PBYTE pbRemain; // first instruction after moved code. [free list] PBYTE pbDetour; // first instruction of detour function. }; C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 104); enum { SIZE_OF_JMP = 8 }; inline PBYTE align4(PBYTE pValue) { return (PBYTE)(((ULONG)pValue) & ~(ULONG)3u); } inline ULONG fetch_thumb_opcode(PBYTE pbCode) { ULONG Opcode = *(UINT16 *)&pbCode[0]; if (Opcode >= 0xe800) { Opcode = (Opcode << 16) | *(UINT16 *)&pbCode[2]; } return Opcode; } inline void write_thumb_opcode(PBYTE &pbCode, ULONG Opcode) { if (Opcode >= 0x10000) { *((UINT16*&)pbCode)++ = Opcode >> 16; } *((UINT16*&)pbCode)++ = (UINT16)Opcode; } PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE *ppPool, PBYTE pbJmpVal) { PBYTE pbLiteral; if (ppPool != NULL) { *ppPool = *ppPool - 4; pbLiteral = *ppPool; } else { pbLiteral = align4(pbCode + 6); } *((PBYTE*&)pbLiteral) = DETOURS_PBYTE_TO_PFUNC(pbJmpVal); LONG delta = pbLiteral - align4(pbCode + 4); write_thumb_opcode(pbCode, 0xf8dff000 | delta); // LDR PC,[PC+n] if (ppPool == NULL) { if (((ULONG)pbCode & 2) != 0) { write_thumb_opcode(pbCode, 0xdefe); // BREAK } pbCode += 4; } return pbCode; } inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit) { while (pbCode < pbLimit) { write_thumb_opcode(pbCode, 0xdefe); } return pbCode; } inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals) { if (pbCode == NULL) { return NULL; } if (ppGlobals != NULL) { *ppGlobals = NULL; } // Skip over the import jump if there is one. pbCode = (PBYTE)DETOURS_PFUNC_TO_PBYTE(pbCode); ULONG Opcode = fetch_thumb_opcode(pbCode); if ((Opcode & 0xfbf08f00) == 0xf2400c00) { // movw r12,#xxxx ULONG Opcode2 = fetch_thumb_opcode(pbCode+4); if ((Opcode2 & 0xfbf08f00) == 0xf2c00c00) { // movt r12,#xxxx ULONG Opcode3 = fetch_thumb_opcode(pbCode+8); if (Opcode3 == 0xf8dcf000) { // ldr pc,[r12] PBYTE pbTarget = (PBYTE)(((Opcode2 << 12) & 0xf7000000) | ((Opcode2 << 1) & 0x08000000) | ((Opcode2 << 16) & 0x00ff0000) | ((Opcode >> 4) & 0x0000f700) | ((Opcode >> 15) & 0x00000800) | ((Opcode >> 0) & 0x000000ff)); if (detour_is_imported(pbCode, pbTarget)) { PBYTE pbNew = *(PBYTE *)pbTarget; pbNew = DETOURS_PFUNC_TO_PBYTE(pbNew); DETOUR_TRACE(("%p->%p: skipped over import table.\n", pbCode, pbNew)); return pbNew; } } } } return pbCode; } inline void detour_find_jmp_bounds(PBYTE pbCode, PDETOUR_TRAMPOLINE *ppLower, PDETOUR_TRAMPOLINE *ppUpper) { // We have to place trampolines within +/- 2GB of code. ULONG_PTR lo = detour_2gb_below((ULONG_PTR)pbCode); ULONG_PTR hi = detour_2gb_above((ULONG_PTR)pbCode); DETOUR_TRACE(("[%p..%p..%p]\n", (PVOID)lo, pbCode, (PVOID)hi)); *ppLower = (PDETOUR_TRAMPOLINE)lo; *ppUpper = (PDETOUR_TRAMPOLINE)hi; } inline BOOL detour_does_code_end_function(PBYTE pbCode) { ULONG Opcode = fetch_thumb_opcode(pbCode); if ((Opcode & 0xffffff87) == 0x4700 || // bx (Opcode & 0xf800d000) == 0xf0009000) { // b return TRUE; } if ((Opcode & 0xffff8000) == 0xe8bd8000) { // pop {...,pc} __debugbreak(); return TRUE; } if ((Opcode & 0xffffff00) == 0x0000bd00) { // pop {...,pc} __debugbreak(); return TRUE; } return FALSE; } inline ULONG detour_is_code_filler(PBYTE pbCode) { if (pbCode[0] == 0x00 && pbCode[1] == 0xbf) { // nop. return 2; } if (pbCode[0] == 0x00 && pbCode[1] == 0x00) { // zero-filled padding. return 2; } return 0; } #endif // DETOURS_ARM #ifdef DETOURS_ARM64 struct _DETOUR_TRAMPOLINE { // An ARM64 instruction is 4 bytes long. // // The overwrite is always composed of 3 instructions (12 bytes) which perform an indirect jump // using _DETOUR_TRAMPOLINE::pbDetour as the address holding the target location. // // Copied instructions can expand. // // The scheme using MovImmediate can cause an instruction // to grow as much as 6 times. // That would be Bcc or Tbz with a large address space: // 4 instructions to form immediate // inverted tbz/bcc // br // // An expansion of 4 is not uncommon -- bl/blr and small address space: // 3 instructions to form immediate // br or brl // // A theoretical maximum for rbCode is thefore 4*4*6 + 16 = 112 (another 16 for jmp to pbRemain). // // With literals, the maximum expansion is 5, including the literals: 4*4*5 + 16 = 96. // // The number is rounded up to 128. m_rbScratchDst should match this. // BYTE rbCode[128]; // target code + jmp to pbRemain BYTE cbCode; // size of moved target code. BYTE cbCodeBreak[3]; // padding to make debugging easier. BYTE rbRestore[24]; // original target code. BYTE cbRestore; // size of original target code. BYTE cbRestoreBreak[3]; // padding to make debugging easier. _DETOUR_ALIGN rAlign[8]; // instruction alignment array. PBYTE pbRemain; // first instruction after moved code. [free list] PBYTE pbDetour; // first instruction of detour function. }; C_ASSERT(sizeof(_DETOUR_TRAMPOLINE) == 184); enum { SIZE_OF_JMP = 12 }; inline ULONG fetch_opcode(PBYTE pbCode) { return *(ULONG *)pbCode; } inline void write_opcode(PBYTE &pbCode, ULONG Opcode) { *(ULONG *)pbCode = Opcode; pbCode += 4; } struct ARM64_INDIRECT_JMP { struct { ULONG Rd : 5; ULONG immhi : 19; ULONG iop : 5; ULONG immlo : 2; ULONG op : 1; } ardp; struct { ULONG Rt : 5; ULONG Rn : 5; ULONG imm : 12; ULONG opc : 2; ULONG iop1 : 2; ULONG V : 1; ULONG iop2 : 3; ULONG size : 2; } ldr; ULONG br; }; #pragma warning(push) #pragma warning(disable:4201) union ARM64_INDIRECT_IMM { struct { ULONG64 pad : 12; ULONG64 adrp_immlo : 2; ULONG64 adrp_immhi : 19; }; LONG64 value; }; #pragma warning(pop) PBYTE detour_gen_jmp_indirect(BYTE *pbCode, ULONG64 *pbJmpVal) { // adrp x17, [jmpval] // ldr x17, [x17, jmpval] // br x17 struct ARM64_INDIRECT_JMP *pIndJmp; union ARM64_INDIRECT_IMM jmpIndAddr; jmpIndAddr.value = (((LONG64)pbJmpVal) & 0xFFFFFFFFFFFFF000) - (((LONG64)pbCode) & 0xFFFFFFFFFFFFF000); pIndJmp = (struct ARM64_INDIRECT_JMP *)pbCode; pbCode = (BYTE *)(pIndJmp + 1); pIndJmp->ardp.Rd = 17; pIndJmp->ardp.immhi = jmpIndAddr.adrp_immhi; pIndJmp->ardp.iop = 0x10; pIndJmp->ardp.immlo = jmpIndAddr.adrp_immlo; pIndJmp->ardp.op = 1; pIndJmp->ldr.Rt = 17; pIndJmp->ldr.Rn = 17; pIndJmp->ldr.imm = (((ULONG64)pbJmpVal) & 0xFFF) / 8; pIndJmp->ldr.opc = 1; pIndJmp->ldr.iop1 = 1; pIndJmp->ldr.V = 0; pIndJmp->ldr.iop2 = 7; pIndJmp->ldr.size = 3; pIndJmp->br = 0xD61F0220; return pbCode; } PBYTE detour_gen_jmp_immediate(PBYTE pbCode, PBYTE *ppPool, PBYTE pbJmpVal) { PBYTE pbLiteral; if (ppPool != NULL) { *ppPool = *ppPool - 8; pbLiteral = *ppPool; } else { pbLiteral = pbCode + 8; } *((PBYTE*&)pbLiteral) = pbJmpVal; LONG delta = (LONG)(pbLiteral - pbCode); write_opcode(pbCode, 0x58000011 | ((delta / 4) << 5)); // LDR X17,[PC+n] write_opcode(pbCode, 0xd61f0000 | (17 << 5)); // BR X17 if (ppPool == NULL) { pbCode += 8; } return pbCode; } inline PBYTE detour_gen_brk(PBYTE pbCode, PBYTE pbLimit) { while (pbCode < pbLimit) { write_opcode(pbCode, 0xd4100000 | (0xf000 << 5)); } return pbCode; } inline INT64 detour_sign_extend(UINT64 value, UINT bits) { const UINT left = 64 - bits; const INT64 m1 = -1; const INT64 wide = (INT64)(value << left); const INT64 sign = (wide < 0) ? (m1 << left) : 0; return value | sign; } inline PBYTE detour_skip_jmp(PBYTE pbCode, PVOID *ppGlobals) { if (pbCode == NULL) { return NULL; } if (ppGlobals != NULL) { *ppGlobals = NULL; } // Skip over the import jump if there is one. pbCode = (PBYTE)pbCode; ULONG Opcode = fetch_opcode(pbCode); if ((Opcode & 0x9f00001f) == 0x90000010) { // adrp x16, IAT ULONG Opcode2 = fetch_opcode(pbCode + 4); if ((Opcode2 & 0xffe003ff) == 0xf9400210) { // ldr x16, [x16, IAT] ULONG Opcode3 = fetch_opcode(pbCode + 8); if (Opcode3 == 0xd61f0200) { // br x16 /* https://static.docs.arm.com/ddi0487/bb/DDI0487B_b_armv8_arm.pdf The ADRP instruction shifts a signed, 21-bit immediate left by 12 bits, adds it to the value of the program counter with the bottom 12 bits cleared to zero, and then writes the result to a general-purpose register. This permits the calculation of the address at a 4KB aligned memory region. In conjunction with an ADD (immediate) instruction, or a Load/Store instruction with a 12-bit immediate offset, this allows for the calculation of, or access to, any address within +/- 4GB of the current PC. PC-rel. addressing This section describes the encoding of the PC-rel. addressing instruction class. The encodings in this section are decoded from Data Processing -- Immediate on page C4-226. Add/subtract (immediate) This section describes the encoding of the Add/subtract (immediate) instruction class. The encodings in this section are decoded from Data Processing -- Immediate on page C4-226. Decode fields Instruction page op 0 ADR 1 ADRP C6.2.10 ADRP Form PC-relative address to 4KB page adds an immediate value that is shifted left by 12 bits, to the PC value to form a PC-relative address, with the bottom 12 bits masked out, and writes the result to the destination register. ADRP ,