Changeset 281718 in webkit


Ignore:
Timestamp:
Aug 27, 2021 1:49:34 PM (3 years ago)
Author:
mark.lam@apple.com
Message:

Make ARM64 and X86_64 probe code a little bit more efficient.
https://bugs.webkit.org/show_bug.cgi?id=229618
rdar://82445743

Reviewed by Yusuke Suzuki.

We were using an unnecessary indirect call to call Probe::executeProbe() when we
can be using a direct call, which emits less JIT code. This patch changes the
ARM64 and X86_64 ports to use a direct call now.

Also rename executeProbe to executeJSCJITProbe to make it more unique since we're
switching to extern "C" linkage for this function now.

For MacroAssemblerX86Common.cpp, we left the X86 and MSVC implementations unchanged.
For X86, I don't know the stack alignment requirements (if any) plus we might want
to delete this code eventually since we're not supporting the X86 JIT anymore.
For MSVC, I don't know the way to express a direct call in MSVC assembly, and have
no way to test it. Will leave that as an exercise for folks working on the Windows
ports if they are interested.

Also remove JITProbeExecutorPtrTag since it's no longer needed.

  • assembler/MacroAssemblerARM64.cpp:

(JSC::MacroAssembler::probe):

  • assembler/MacroAssemblerARMv7.cpp:

(JSC::MacroAssembler::probe):

  • assembler/MacroAssemblerMIPS.cpp:

(JSC::MacroAssembler::probe):

  • assembler/MacroAssemblerX86Common.cpp:

(JSC::ctiMasmProbeTrampoline):
(JSC::MacroAssembler::probe):

  • assembler/ProbeContext.cpp:

(JSC::Probe::executeJSCJITProbe):
(JSC::Probe::executeProbe): Deleted.

  • assembler/ProbeContext.h:
  • runtime/JSCPtrTag.h:
Location:
trunk/Source/JavaScriptCore
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/ChangeLog

    r281717 r281718  
     12021-08-27  Mark Lam  <mark.lam@apple.com>
     2
     3        Make ARM64 and X86_64 probe code a little bit more efficient.
     4        https://bugs.webkit.org/show_bug.cgi?id=229618
     5        rdar://82445743
     6
     7        Reviewed by Yusuke Suzuki.
     8
     9        We were using an unnecessary indirect call to call Probe::executeProbe() when we
     10        can be using a direct call, which emits less JIT code.  This patch changes the
     11        ARM64 and X86_64 ports to use a direct call now.
     12
     13        Also rename executeProbe to executeJSCJITProbe to make it more unique since we're
     14        switching to extern "C" linkage for this function now.
     15
     16        For MacroAssemblerX86Common.cpp, we left the X86 and MSVC implementations unchanged.
     17        For X86, I don't know the stack alignment requirements (if any) plus we might want
     18        to delete this code eventually since we're not supporting the X86 JIT anymore.
     19        For MSVC, I don't know the way to express a direct call in MSVC assembly, and have
     20        no way to test it.  Will leave that as an exercise for folks working on the Windows
     21        ports if they are interested.
     22
     23        Also remove JITProbeExecutorPtrTag since it's no longer needed.
     24
     25        * assembler/MacroAssemblerARM64.cpp:
     26        (JSC::MacroAssembler::probe):
     27        * assembler/MacroAssemblerARMv7.cpp:
     28        (JSC::MacroAssembler::probe):
     29        * assembler/MacroAssemblerMIPS.cpp:
     30        (JSC::MacroAssembler::probe):
     31        * assembler/MacroAssemblerX86Common.cpp:
     32        (JSC::ctiMasmProbeTrampoline):
     33        (JSC::MacroAssembler::probe):
     34        * assembler/ProbeContext.cpp:
     35        (JSC::Probe::executeJSCJITProbe):
     36        (JSC::Probe::executeProbe): Deleted.
     37        * assembler/ProbeContext.h:
     38        * runtime/JSCPtrTag.h:
     39
    1402021-08-27  Saam Barati  <sbarati@apple.com>
    241
  • trunk/Source/JavaScriptCore/assembler/MacroAssemblerARM64.cpp

    r278302 r281718  
    256256    UCPURegister x25;
    257257    UCPURegister x26;
    258     UCPURegister x27;
    259     UCPURegister x28;
    260258    UCPURegister x30; // lr
     259    UCPURegister x27; // Saved in trampoline to use as scratch.
     260    UCPURegister unusedForAlignment;
    261261};
    262262
     
    264264#define IN_X25_OFFSET (1 * GPREG_SIZE)
    265265#define IN_X26_OFFSET (2 * GPREG_SIZE)
    266 #define IN_X27_OFFSET (3 * GPREG_SIZE)
    267 #define IN_X28_OFFSET (4 * GPREG_SIZE)
    268 #define IN_X30_OFFSET (5 * GPREG_SIZE)
     266#define IN_X30_OFFSET (3 * GPREG_SIZE)
     267#define IN_X27_OFFSET (4 * GPREG_SIZE)
     268// The 5th slot is unused. It's only there for alignment.
    269269#define IN_SIZE       (6 * GPREG_SIZE)
    270270
     
    272272static_assert(IN_X25_OFFSET == offsetof(IncomingProbeRecord, x25), "IN_X25_OFFSET is incorrect");
    273273static_assert(IN_X26_OFFSET == offsetof(IncomingProbeRecord, x26), "IN_X26_OFFSET is incorrect");
     274static_assert(IN_X30_OFFSET == offsetof(IncomingProbeRecord, x30), "IN_X23_OFFSET is incorrect");
    274275static_assert(IN_X27_OFFSET == offsetof(IncomingProbeRecord, x27), "IN_X27_OFFSET is incorrect");
    275 static_assert(IN_X28_OFFSET == offsetof(IncomingProbeRecord, x28), "IN_X22_OFFSET is incorrect");
    276 static_assert(IN_X30_OFFSET == offsetof(IncomingProbeRecord, x30), "IN_X23_OFFSET is incorrect");
    277276static_assert(IN_SIZE == sizeof(IncomingProbeRecord), "IN_SIZE is incorrect");
    278277static_assert(!(sizeof(IncomingProbeRecord) & 0xf), "IncomingProbeStack must be 16-byte aligned");
     
    318317#if CPU(ARM64E)
    319318#define JIT_PROBE_PC_PTR_TAG 0xeeac
    320 #define JIT_PROBE_EXECUTOR_PTR_TAG 0x28de
    321319#define JIT_PROBE_STACK_INITIALIZATION_FUNCTION_PTR_TAG 0x315c
    322320static_assert(JIT_PROBE_PC_PTR_TAG == JITProbePCPtrTag);
    323 static_assert(JIT_PROBE_EXECUTOR_PTR_TAG == JITProbeExecutorPtrTag);
    324321static_assert(JIT_PROBE_STACK_INITIALIZATION_FUNCTION_PTR_TAG == JITProbeStackInitializationFunctionPtrTag);
    325322#endif
     
    341338    //     x25: probe arg
    342339    //     x26: scratch, was ctiMasmProbeTrampoline
    343     //     x27: scratch
    344     //     x28: Probe::executeProbe
    345340    //     x30: return address
    346341
     342    "str       x27, [sp, #" STRINGIZE_VALUE_OF(IN_X27_OFFSET) "]" "\n"
    347343    "mov       x26, sp" "\n"
    348     "mov       x27, sp" "\n"
    349 
    350     "sub       x27, x27, #" STRINGIZE_VALUE_OF(PROBE_SIZE_PLUS_EXTRAS + OUT_SIZE) "\n"
    351     "bic       x27, x27, #0xf" "\n" // The ARM EABI specifies that the stack needs to be 16 byte aligned.
    352     "mov       sp, x27" "\n" // Set the sp to protect the Probe::State from interrupts before we initialize it.
     344    "sub       x27, sp, #" STRINGIZE_VALUE_OF(PROBE_SIZE_PLUS_EXTRAS + OUT_SIZE) "\n"
     345    "bic       sp, x27, #0xf" "\n" // The ARM EABI specifies that the stack needs to be 16 byte aligned.
    353346
    354347    "stp       x24, x25, [sp, #" STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "]" "\n" // Store the probe handler function and arg (preloaded into x24 and x25
     
    363356
    364357    "ldp       x2, x3, [x26, #" STRINGIZE_VALUE_OF(IN_X24_OFFSET) "]" "\n" // Preload saved x24 and x25.
    365     "ldp       x4, x5, [x26, #" STRINGIZE_VALUE_OF(IN_X26_OFFSET) "]" "\n" // Preload saved x26 and x27.
    366     "ldp       x6, x7, [x26, #" STRINGIZE_VALUE_OF(IN_X28_OFFSET) "]" "\n" // Preload saved x28 and lr.
     358    "ldp       x4, x5, [x26, #" STRINGIZE_VALUE_OF(IN_X26_OFFSET) "]" "\n" // Preload saved x26 and lr.
     359    "ldr       x27, [x26, #" STRINGIZE_VALUE_OF(IN_X27_OFFSET) "]" "\n"
     360
    367361    "add       x26, x26, #" STRINGIZE_VALUE_OF(IN_SIZE) "\n" // Compute the sp before the probe.
    368362
     
    374368    "stp       x20, x21, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X20_OFFSET) "]" "\n"
    375369    "stp       x22, x23, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X22_OFFSET) "]" "\n"
    376     "stp       x2, x3, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X24_OFFSET) "]" "\n" // Store saved r24 and r25 (preloaded into x2 and x3 above).
    377     "stp       x4, x5, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X26_OFFSET) "]" "\n" // Store saved r26 and r27 (preloaded into x4 and x5 above).
    378     "stp       x6, x29, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X28_OFFSET) "]" "\n"
    379     "stp       x7, x26, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_LR_OFFSET) "]" "\n" // Save values lr and sp (original sp value computed into x26 above).
     370    "stp       x2,  x3, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X24_OFFSET) "]" "\n" // Store saved r24 and r25 (preloaded into x2 and x3 above).
     371    "stp       x4,  x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X26_OFFSET) "]" "\n" // Store saved r26 (preloaded into x4) and r27.
     372    "stp       x28, x29, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X28_OFFSET) "]" "\n"
     373    "stp       x5, x26, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_LR_OFFSET) "]" "\n" // Save values lr and sp (original sp value computed into x26 above).
    380374
    381375    "add       x30, x30, #" STRINGIZE_VALUE_OF(2 * GPREG_SIZE) "\n" // The PC after the probe is at 2 instructions past the return point.
     
    412406    // the caller of the probe (which is what we want in order to play nice with debuggers e.g. lldb).
    413407    "mov       x0, sp" "\n" // Set the Probe::State* arg.
    414 #if CPU(ARM64E)
    415     "movz      lr, #" STRINGIZE_VALUE_OF(JIT_PROBE_EXECUTOR_PTR_TAG) "\n"
    416     "blrab     x28, lr" "\n" // Call the probe handler.
    417 #else
    418     "blr       x28" "\n" // Call the probe handler.
    419 #endif
     408    "bl      " SYMBOL_STRING(executeJSCJITProbe) "\n"
    420409
    421410    // Make sure the Probe::State is entirely below the result stack pointer so
     
    512501    // either modify lr or pc, but not both in the same probe invocation. The probe
    513502    // mechanism ensures that we never try to modify both lr and pc with a RELEASE_ASSERT
    514     // in Probe::executeProbe().
     503    // in Probe::().
    515504
    516505    // Determine if the probe handler changed the pc.
     
    562551    "orr       x27, x27, x28" "\n"
    563552    "ldrb      w27, [x27]" "\n"
    564     "add       x27, x30, #48" "\n" // Compute sp at return point.
    565     "pacib     x28, x27" "\n"
    566553#endif
    567554    "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FP_OFFSET) "]" "\n"
     
    588575
    589576    storePair64(x24, x25, sp, TrustedImm32(offsetof(IncomingProbeRecord, x24)));
    590     storePair64(x26, x27, sp, TrustedImm32(offsetof(IncomingProbeRecord, x26)));
    591     storePair64(x28, x30, sp, TrustedImm32(offsetof(IncomingProbeRecord, x28))); // Note: x30 is lr.
     577    storePair64(x26, x30, sp, TrustedImm32(offsetof(IncomingProbeRecord, x26))); // Note: x30 is lr.
    592578    move(TrustedImmPtr(tagCFunction<OperationPtrTag>(ctiMasmProbeTrampoline)), x26);
    593     move(TrustedImmPtr(tagCFunction<JITProbeExecutorPtrTag>(Probe::executeProbe)), x28);
    594579#if CPU(ARM64E)
    595580    assertIsTaggedWith<JITProbePtrTag>(function);
  • trunk/Source/JavaScriptCore/assembler/MacroAssemblerARMv7.cpp

    r277936 r281718  
    229229    //     r0: probe function
    230230    //     r1: probe arg
    231     //     r2: Probe::executeProbe
     231    //     r2: Probe::executeJSCJITProbe
    232232    //     ip: scratch, was ctiMasmProbeTrampoline
    233233    //     lr: return address
    234234
    235235    "mov       ip, sp" "\n"
    236     "str       r2, [ip, #-" STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n" // Stash Probe::executeProbe.
     236    "str       r2, [ip, #-" STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n" // Stash Probe::executeJSCJITProbe.
    237237
    238238    "mov       r2, sp" "\n"
     
    242242    "bic       r2, r2, #0xf" "\n"
    243243    "mov       sp, r2" "\n" // Set the sp to protect the Probe::State from interrupts before we initialize it.
    244     "ldr       r2, [ip, #-" STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n" // Reload Probe::executeProbe.
     244    "ldr       r2, [ip, #-" STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n" // Reload Probe::executeJSCJITProbe.
    245245
    246246    "str       r0, [sp, #" STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "]" "\n"
     
    281281
    282282    "mov       r0, sp" "\n" // the Probe::State* arg.
    283     "blx       r2" "\n" // Call Probe::executeProbe.
     283    "blx       r2" "\n" // Call Probe::executeJSCJITProbe.
    284284
    285285    // Make sure the Probe::State is entirely below the result stack pointer so
     
    381381    move(TrustedImmPtr(reinterpret_cast<void*>(function)), r0);
    382382    move(TrustedImmPtr(arg), r1);
    383     move(TrustedImmPtr(reinterpret_cast<void*>(Probe::executeProbe)), r2);
     383    move(TrustedImmPtr(reinterpret_cast<void*>(Probe::executeJSCJITProbe)), r2);
    384384    move(TrustedImmPtr(reinterpret_cast<void*>(ctiMasmProbeTrampoline)), ip);
    385385    m_assembler.blx(ip);
  • trunk/Source/JavaScriptCore/assembler/MacroAssemblerMIPS.cpp

    r277936 r281718  
    11/*
    2  * Copyright (C) 2013-2017 Apple Inc. All rights reserved.
     2 * Copyright (C) 2013-2021 Apple Inc. All rights reserved.
    33 *
    44 * Redistribution and use in source and binary forms, with or without
     
    305305    //     a0: probe function
    306306    //     a1: probe arg
    307     //     a2: Probe::executeProbe
     307    //     a2: Probe::executeJSCJITProbe
    308308    //     s0: scratch, was ctiMasmProbeTrampoline
    309309    //     s1: scratch
     
    395395    "move      $a0, $sp" "\n" // Set the Probe::State* arg.
    396396    "addiu     $sp, $sp, -16" "\n" // Allocate stack space for (unused) 16 bytes (8-byte aligned) for 4 arguments.
    397     "move      $t9, $a2" "\n" // Probe::executeProbe()
     397    "move      $t9, $a2" "\n" // Probe::executeJSCJITProbe()
    398398    "jalr      $t9" "\n" // Call the probe handler.
    399399    "nop" "\n"
     
    509509    // either modify ra or pc, but not both in the same probe invocation. The probe
    510510    // mechanism ensures that we never try to modify both ra and pc with a RELEASE_ASSERT
    511     // in Probe::executeProbe().
     511    // in Probe::executeJSCJITProbe().
    512512
    513513    // Determine if the probe handler changed the pc.
     
    564564    move(TrustedImmPtr(reinterpret_cast<void*>(function)), a0);
    565565    move(TrustedImmPtr(arg), a1);
    566     move(TrustedImmPtr(reinterpret_cast<void*>(Probe::executeProbe)), a2);
     566    move(TrustedImmPtr(reinterpret_cast<void*>(Probe::executeJSCJITProbe)), a2);
    567567    move(TrustedImmPtr(reinterpret_cast<void*>(ctiMasmProbeTrampoline)), s0);
    568568    m_assembler.jalr(s0);
  • trunk/Source/JavaScriptCore/assembler/MacroAssemblerX86Common.cpp

    r277936 r281718  
    106106#endif // CPU(X86_64)
    107107
    108 #define PROBE_EXECUTOR_OFFSET PROBE_SIZE // Stash the executeProbe function pointer at the end of the ProbeContext.
     108#if COMPILER(MSVC) || CPU(X86)
     109#define PROBE_EXECUTOR_OFFSET PROBE_SIZE // Stash the executeJSCJITProbe function pointer at the end of the ProbeContext.
     110#endif
    109111
    110112// The outgoing record to be popped off the stack at the end consists of:
     
    166168
    167169static_assert(sizeof(Probe::State) == PROBE_SIZE, "Probe::State::size's matches ctiMasmProbeTrampoline");
     170#if COMPILER(MSVC) || CPU(X86)
    168171static_assert((PROBE_EXECUTOR_OFFSET + PTR_SIZE) <= (PROBE_SIZE + OUT_SIZE), "Must have room after ProbeContext to stash the probe handler");
     172#endif
    169173
    170174#undef PROBE_OFFSETOF
     
    190194    //
    191195    // Incoming registers contain:
    192     //     ecx: Probe::executeProbe
     196    //     ecx: Probe::executeJSCJITProbe
    193197    //     edx: probe function
    194198    //     ebx: probe arg
     
    357361        //
    358362        // Incoming registers contain:
    359         //     ecx: Probe::executeProbe
     363        //     ecx: Probe::executeJSCJITProbe
    360364        //     edx: probe function
    361365        //     ebx: probe arg
     
    529533    //     rbp[2 * ptrSize]: saved rbx
    530534    //     rbp[3 * ptrSize]: saved rdx
    531     //     rbp[4 * ptrSize]: saved rcx
    532     //     rbp[5 * ptrSize]: saved rax
     535    //     rbp[4 * ptrSize]: saved rax
    533536    //
    534537    // Incoming registers contain:
    535     //     rcx: Probe::executeProbe
    536538    //     rdx: probe function
    537539    //     rbx: probe arg
     
    544546    // Since sp points to the Probe::State, we've ensured that it's protected from interrupts before we initialize it.
    545547
    546     "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%rsp)" "\n"
    547548    "movq %rdx, " STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "(%rsp)" "\n"
    548549    "movq %rbx, " STRINGIZE_VALUE_OF(PROBE_ARG_OFFSET) "(%rsp)" "\n"
    549550    "movq %rsi, " STRINGIZE_VALUE_OF(PROBE_CPU_ESI_OFFSET) "(%rsp)" "\n"
    550551    "movq %rdi, " STRINGIZE_VALUE_OF(PROBE_CPU_EDI_OFFSET) "(%rsp)" "\n"
     552
     553    "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%rsp)" "\n"
    551554
    552555    "movq -1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rbp), %rcx" "\n"
     
    561564    "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EDX_OFFSET) "(%rsp)" "\n"
    562565    "movq 4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rbp), %rcx" "\n"
    563     "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_ECX_OFFSET) "(%rsp)" "\n"
    564     "movq 5 * " STRINGIZE_VALUE_OF(PTR_SIZE) "(%rbp), %rcx" "\n"
    565566    "movq %rcx, " STRINGIZE_VALUE_OF(PROBE_CPU_EAX_OFFSET) "(%rsp)" "\n"
    566567
     
    596597
    597598    "movq %rsp, %rdi" "\n" // the Probe::State* arg.
    598     "call *" STRINGIZE_VALUE_OF(PROBE_EXECUTOR_OFFSET) "(%rsp)" "\n"
     599    "call " SYMBOL_STRING(executeJSCJITProbe) "\n"
    599600
    600601    // Make sure the Probe::State is entirely below the result stack pointer so
     
    752753void MacroAssembler::probe(Probe::Function function, void* arg)
    753754{
     755#if CPU(X86_64) && COMPILER(GCC_COMPATIBLE)
     756    // Extra push so that the total number of pushes pad out to 32-bytes, and the
     757    // stack pointer remains 32 byte aligned as required by the ABI.
     758    push(RegisterID::eax);
     759#endif
    754760    push(RegisterID::eax);
    755761    move(TrustedImmPtr(reinterpret_cast<void*>(ctiMasmProbeTrampoline)), RegisterID::eax);
     762#if COMPILER(MSVC) || CPU(X86)
    756763    push(RegisterID::ecx);
    757     move(TrustedImmPtr(reinterpret_cast<void*>(Probe::executeProbe)), RegisterID::ecx);
     764    move(TrustedImmPtr(reinterpret_cast<void*>(Probe::executeJSCJITProbe)), RegisterID::ecx);
     765#endif
    758766    push(RegisterID::edx);
    759767    move(TrustedImmPtr(reinterpret_cast<void*>(function)), RegisterID::edx);
  • trunk/Source/JavaScriptCore/assembler/ProbeContext.cpp

    r277936 r281718  
    3434static void flushDirtyStackPages(State*);
    3535
    36 void executeProbe(State* state)
     36void executeJSCJITProbe(State* state)
    3737{
    3838    Context context(state);
  • trunk/Source/JavaScriptCore/assembler/ProbeContext.h

    r277936 r281718  
    244244};
    245245
    246 void executeProbe(State*);
     246extern "C" void executeJSCJITProbe(State*);
    247247
    248248} // namespace Probe
  • trunk/Source/JavaScriptCore/runtime/JSCPtrTag.h

    r281544 r281718  
    5757    v(HostFunctionPtrTag, PtrTagCalleeType::Native, PtrTagCallerType::Native) \
    5858    v(JITProbePtrTag, PtrTagCalleeType::Native, PtrTagCallerType::Native) \
    59     v(JITProbeExecutorPtrTag, PtrTagCalleeType::Native, PtrTagCallerType::Native) \
    6059    v(JITProbePCPtrTag, PtrTagCalleeType::Native, PtrTagCallerType::Native) \
    6160    v(JITProbeStackInitializationFunctionPtrTag, PtrTagCalleeType::Native, PtrTagCallerType::Native) \
Note: See TracChangeset for help on using the changeset viewer.