Changeset 219951 in webkit


Ignore:
Timestamp:
Jul 26, 2017 11:31:02 AM (7 years ago)
Author:
mark.lam@apple.com
Message:

Add the ability to change sp and pc to the ARM64 JIT probe.
https://bugs.webkit.org/show_bug.cgi?id=174697
<rdar://problem/33436965>

Reviewed by JF Bastien.

This patch implements the following:

  1. The ARM64 probe now supports modifying the pc and sp.

However, lr is not preserved when modifying the pc because it is used as the
scratch register for the indirect jump. Hence, the probe handler function
may not modify both lr and pc in the same probe invocation.

  1. Fix probe tests to use bitwise comparison when comparing double register values. Otherwise, equivalent nan values will be interpreted as not equivalent.
  1. Change the minimum offset increment in testProbeModifiesStackPointer to be 16 bytes for ARM64. This is because the ARM64 probe now uses the ldp and stp instructions which require 16 byte alignment for their memory access.
  • assembler/MacroAssemblerARM64.cpp:

(JSC::arm64ProbeError):
(JSC::MacroAssembler::probe):
(JSC::arm64ProbeTrampoline): Deleted.

  • assembler/testmasm.cpp:

(JSC::isSpecialGPR):
(JSC::testProbeReadsArgumentRegisters):
(JSC::testProbeWritesArgumentRegisters):
(JSC::testProbePreservesGPRS):
(JSC::testProbeModifiesStackPointer):
(JSC::testProbeModifiesStackPointerToInsideProbeContextOnStack):
(JSC::testProbeModifiesStackPointerToNBytesBelowSP):

Location:
trunk/Source/JavaScriptCore
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/ChangeLog

    r219899 r219951  
     12017-07-26  Mark Lam  <mark.lam@apple.com>
     2
     3        Add the ability to change sp and pc to the ARM64 JIT probe.
     4        https://bugs.webkit.org/show_bug.cgi?id=174697
     5        <rdar://problem/33436965>
     6
     7        Reviewed by JF Bastien.
     8
     9        This patch implements the following:
     10
     11        1. The ARM64 probe now supports modifying the pc and sp.
     12
     13           However, lr is not preserved when modifying the pc because it is used as the
     14           scratch register for the indirect jump. Hence, the probe handler function
     15           may not modify both lr and pc in the same probe invocation.
     16
     17        2. Fix probe tests to use bitwise comparison when comparing double register
     18           values. Otherwise, equivalent nan values will be interpreted as not equivalent.
     19
     20        3. Change the minimum offset increment in testProbeModifiesStackPointer to be
     21           16 bytes for ARM64.  This is because the ARM64 probe now uses the ldp and stp
     22           instructions which require 16 byte alignment for their memory access.
     23
     24        * assembler/MacroAssemblerARM64.cpp:
     25        (JSC::arm64ProbeError):
     26        (JSC::MacroAssembler::probe):
     27        (JSC::arm64ProbeTrampoline): Deleted.
     28        * assembler/testmasm.cpp:
     29        (JSC::isSpecialGPR):
     30        (JSC::testProbeReadsArgumentRegisters):
     31        (JSC::testProbeWritesArgumentRegisters):
     32        (JSC::testProbePreservesGPRS):
     33        (JSC::testProbeModifiesStackPointer):
     34        (JSC::testProbeModifiesStackPointerToInsideProbeContextOnStack):
     35        (JSC::testProbeModifiesStackPointerToNBytesBelowSP):
     36
    1372017-07-25  JF Bastien  <jfbastien@apple.com>
    238
  • trunk/Source/JavaScriptCore/assembler/MacroAssemblerARM64.cpp

    r219790 r219951  
    123123#define PROBE_CPU_Q31_OFFSET (PROBE_FIRST_FPREG_OFFSET + (31 * FPREG_SIZE))
    124124#define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (32 * FPREG_SIZE))
    125 #define SAVED_CALLER_SP PROBE_SIZE
    126 #define PROBE_SIZE_PLUS_SAVED_CALLER_SP (SAVED_CALLER_SP + PTR_SIZE)
    127 #define PROBE_ALIGNED_SIZE (PROBE_SIZE_PLUS_SAVED_CALLER_SP)
     125
     126#define SAVED_PROBE_RETURN_PC_OFFSET        (PROBE_SIZE + (0 * PTR_SIZE))
     127#define SAVED_PROBE_LR_OFFSET               (PROBE_SIZE + (1 * PTR_SIZE))
     128#define SAVED_PROBE_ERROR_FUNCTION_OFFSET   (PROBE_SIZE + (2 * PTR_SIZE))
     129#define PROBE_SIZE_PLUS_EXTRAS              (PROBE_SIZE + (3 * PTR_SIZE))
    128130
    129131// These ASSERTs remind you that if you change the layout of ProbeContext,
     
    210212
    211213COMPILE_ASSERT(sizeof(ProbeContext) == PROBE_SIZE, ProbeContext_size_matches_ctiMasmProbeTrampoline);
    212 COMPILE_ASSERT(!(PROBE_ALIGNED_SIZE & 0xf), ProbeContext_aligned_size_offset_should_be_16_byte_aligned);
     214
     215// Conditions for using ldp and stp.
     216static_assert(PROBE_CPU_PC_OFFSET == PROBE_CPU_SP_OFFSET + PTR_SIZE, "PROBE_CPU_SP_OFFSET and PROBE_CPU_PC_OFFSET must be adjacent");
    213217
    214218#undef PROBE_OFFSETOF
     219
     220#define FPR_OFFSET(fpr) (PROBE_CPU_##fpr##_OFFSET - PROBE_CPU_Q0_OFFSET)
     221
     222struct IncomingProbeRecord {
     223    uintptr_t probeHandlerFunction;
     224    uintptr_t probeArg;
     225    uintptr_t x26;
     226    uintptr_t x27;
     227    uintptr_t lr;
     228    uintptr_t sp;
     229    uintptr_t probeErrorFunction;
     230    uintptr_t unused; // Padding for alignment.
     231};
     232
     233#define IN_HANDLER_FUNCTION_OFFSET (0 * PTR_SIZE)
     234#define IN_ARG_OFFSET              (1 * PTR_SIZE)
     235#define IN_X26_OFFSET              (2 * PTR_SIZE)
     236#define IN_X27_OFFSET              (3 * PTR_SIZE)
     237#define IN_LR_OFFSET               (4 * PTR_SIZE)
     238#define IN_SP_OFFSET               (5 * PTR_SIZE)
     239#define IN_ERROR_FUNCTION_OFFSET   (6 * PTR_SIZE)
     240
     241static_assert(IN_HANDLER_FUNCTION_OFFSET == offsetof(IncomingProbeRecord, probeHandlerFunction), "IN_HANDLER_FUNCTION_OFFSET is incorrect");
     242static_assert(IN_ARG_OFFSET == offsetof(IncomingProbeRecord, probeArg), "IN_ARG_OFFSET is incorrect");
     243static_assert(IN_X26_OFFSET == offsetof(IncomingProbeRecord, x26), "IN_X26_OFFSET is incorrect");
     244static_assert(IN_X27_OFFSET == offsetof(IncomingProbeRecord, x27), "IN_X27_OFFSET is incorrect");
     245static_assert(IN_LR_OFFSET == offsetof(IncomingProbeRecord, lr), "IN_LR_OFFSET is incorrect");
     246static_assert(IN_SP_OFFSET == offsetof(IncomingProbeRecord, sp), "IN_SP_OFFSET is incorrect");
     247static_assert(IN_ERROR_FUNCTION_OFFSET == offsetof(IncomingProbeRecord, probeErrorFunction), "IN_ERROR_FUNCTION_OFFSET is incorrect");
     248static_assert(!(sizeof(IncomingProbeRecord) & 0xf), "IncomingProbeStack must be 16-byte aligned");
     249
     250struct OutgoingProbeRecord {
     251    uintptr_t nzcv;
     252    uintptr_t fpsr;
     253    uintptr_t x27;
     254    uintptr_t x28;
     255    uintptr_t fp;
     256    uintptr_t lr;
     257};
     258
     259#define OUT_NZCV_OFFSET (0 * PTR_SIZE)
     260#define OUT_FPSR_OFFSET (1 * PTR_SIZE)
     261#define OUT_X27_OFFSET  (2 * PTR_SIZE)
     262#define OUT_X28_OFFSET  (3 * PTR_SIZE)
     263#define OUT_FP_OFFSET   (4 * PTR_SIZE)
     264#define OUT_LR_OFFSET   (5 * PTR_SIZE)
     265#define OUT_SIZE        (6 * PTR_SIZE)
     266
     267static_assert(OUT_NZCV_OFFSET == offsetof(OutgoingProbeRecord, nzcv), "OUT_NZCV_OFFSET is incorrect");
     268static_assert(OUT_FPSR_OFFSET == offsetof(OutgoingProbeRecord, fpsr), "OUT_FPSR_OFFSET is incorrect");
     269static_assert(OUT_X27_OFFSET == offsetof(OutgoingProbeRecord, x27), "OUT_X27_OFFSET is incorrect");
     270static_assert(OUT_X28_OFFSET == offsetof(OutgoingProbeRecord, x28), "OUT_X28_OFFSET is incorrect");
     271static_assert(OUT_FP_OFFSET == offsetof(OutgoingProbeRecord, fp), "OUT_FP_OFFSET is incorrect");
     272static_assert(OUT_LR_OFFSET == offsetof(OutgoingProbeRecord, lr), "OUT_LR_OFFSET is incorrect");
     273static_assert(OUT_SIZE == sizeof(OutgoingProbeRecord), "OUT_SIZE is incorrect");
     274static_assert(!(sizeof(OutgoingProbeRecord) & 0xf), "IncomingProbeStack must be 16-byte aligned");
     275
     276#define STATE_PC_NOT_CHANGED 0
     277#define STATE_PC_CHANGED 1
     278static_assert(STATE_PC_NOT_CHANGED != STATE_PC_CHANGED, "STATE_PC_NOT_CHANGED and STATE_PC_CHANGED should not be equal");
    215279
    216280asm (
    217281    ".text" "\n"
    218     ".align 2" "\n"
     282    ".balign 16" "\n"
    219283    ".globl " SYMBOL_STRING(ctiMasmProbeTrampoline) "\n"
    220284    HIDE_SYMBOL(ctiMasmProbeTrampoline) "\n"
    221285    SYMBOL_STRING(ctiMasmProbeTrampoline) ":" "\n"
    222286
    223     // MacroAssemblerARM64::probe() has already generated code to store some values.
    224     // The top of stack (the caller save buffer) now looks like this:
    225     //     sp[0 * ptrSize]: probeFunction
    226     //     sp[1 * ptrSize]: arg1
    227     //     sp[2 * ptrSize]: address of arm64ProbeTrampoline()
    228     //     sp[3 * ptrSize]: saved x27
    229     //     sp[4 * ptrSize]: saved x28
    230     //     sp[5 * ptrSize]: saved lr
    231     //     sp[6 * ptrSize]: saved sp
    232 
     287    // MacroAssemblerARM64::probe() has already generated code to store some values in an
     288    // IncomingProbeRecord. sp points to the IncomingProbeRecord.
     289
     290    "mov       x26, sp" "\n"
    233291    "mov       x27, sp" "\n"
    234     "mov       x28, sp" "\n"
    235 
    236     "sub       x28, x28, #" STRINGIZE_VALUE_OF(PROBE_ALIGNED_SIZE) "\n"
    237 
    238     // The ARM EABI specifies that the stack needs to be 16 byte aligned.
    239     "bic       x28, x28, #0xf" "\n"
    240     "mov       sp, x28" "\n"
    241 
    242     "str       x27, [sp, #" STRINGIZE_VALUE_OF(SAVED_CALLER_SP) "]" "\n"
    243 
    244     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X0_OFFSET) "]" "\n"
    245     "str       x1, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X1_OFFSET) "]" "\n"
    246     "str       x2, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X2_OFFSET) "]" "\n"
    247     "str       x3, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X3_OFFSET) "]" "\n"
    248     "str       x4, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X4_OFFSET) "]" "\n"
    249     "str       x5, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X5_OFFSET) "]" "\n"
    250     "str       x6, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X6_OFFSET) "]" "\n"
    251     "str       x7, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X7_OFFSET) "]" "\n"
    252     "str       x8, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X8_OFFSET) "]" "\n"
    253     "str       x9, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X9_OFFSET) "]" "\n"
    254     "str       x10, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X10_OFFSET) "]" "\n"
    255     "str       x11, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X11_OFFSET) "]" "\n"
    256     "str       x12, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X12_OFFSET) "]" "\n"
    257     "str       x13, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X13_OFFSET) "]" "\n"
    258     "str       x14, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X14_OFFSET) "]" "\n"
    259     "str       x15, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X15_OFFSET) "]" "\n"
    260     "str       x16, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X16_OFFSET) "]" "\n"
    261     "str       x17, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X17_OFFSET) "]" "\n"
    262     "str       x18, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X18_OFFSET) "]" "\n"
    263     "str       x19, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X19_OFFSET) "]" "\n"
    264     "str       x20, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X20_OFFSET) "]" "\n"
    265     "str       x21, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X21_OFFSET) "]" "\n"
    266     "str       x22, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X22_OFFSET) "]" "\n"
    267     "str       x23, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X23_OFFSET) "]" "\n"
    268     "str       x24, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X24_OFFSET) "]" "\n"
    269     "str       x25, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X25_OFFSET) "]" "\n"
    270     "str       x26, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X26_OFFSET) "]" "\n"
    271 
    272     "ldr       x0, [x27, #3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    273     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X27_OFFSET) "]" "\n"
    274     "ldr       x0, [x27, #4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    275     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X28_OFFSET) "]" "\n"
    276 
    277     "str       fp, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FP_OFFSET) "]" "\n"
    278 
    279     "ldr       x0, [x27, #5 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    280     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_LR_OFFSET) "]" "\n"
    281     "ldr       x0, [x27, #6 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    282     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_SP_OFFSET) "]" "\n"
    283 
     292
     293    "sub       x27, x27, #" STRINGIZE_VALUE_OF(PROBE_SIZE_PLUS_EXTRAS) "\n"
     294    "bic       x27, x27, #0xf" "\n" // The ARM EABI specifies that the stack needs to be 16 byte aligned.
     295    "mov       sp, x27" "\n" // Make sure interrupts don't over-write our data on the stack.
     296
     297    "stp       x0, x1, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X0_OFFSET) "]" "\n"
     298    "mrs       x0, nzcv" "\n" // Preload nzcv.
     299    "stp       x2, x3, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X2_OFFSET) "]" "\n"
     300    "stp       x4, x5, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X4_OFFSET) "]" "\n"
     301    "mrs       x1, fpsr" "\n" // Preload fpsr.
     302    "stp       x6, x7, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X6_OFFSET) "]" "\n"
     303    "stp       x8, x9, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X8_OFFSET) "]" "\n"
     304
     305    "ldp       x2, x3, [x26, #" STRINGIZE_VALUE_OF(IN_HANDLER_FUNCTION_OFFSET) "]" "\n" // Preload probe handler function and probe arg.
     306    "ldp       x4, x5, [x26, #" STRINGIZE_VALUE_OF(IN_X26_OFFSET) "]" "\n" // Preload saved r26 and r27.
     307    "ldp       x6, x7, [x26, #" STRINGIZE_VALUE_OF(IN_LR_OFFSET) "]" "\n" // Preload saved lr and sp.
     308    "ldr       x8, [x26, #" STRINGIZE_VALUE_OF(IN_ERROR_FUNCTION_OFFSET) "]" "\n" // Preload probe error function.
     309
     310    "stp       x10, x11, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X10_OFFSET) "]" "\n"
     311    "stp       x12, x13, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X12_OFFSET) "]" "\n"
     312    "stp       x14, x15, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X14_OFFSET) "]" "\n"
     313    "stp       x16, x17, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X16_OFFSET) "]" "\n"
     314    "stp       x18, x19, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X18_OFFSET) "]" "\n"
     315    "stp       x20, x21, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X20_OFFSET) "]" "\n"
     316    "stp       x22, x23, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X22_OFFSET) "]" "\n"
     317    "stp       x24, x25, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X24_OFFSET) "]" "\n"
     318    "stp       x4, x5, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X26_OFFSET) "]" "\n" // Store saved r26 and r27 (preloaded into x4 and x5 above).
     319    "stp       x28, fp, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X28_OFFSET) "]" "\n"
     320    "stp       x6, x7, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_LR_OFFSET) "]" "\n" // Save values lr and sp (preloaded into x6 and x7 above).
     321
     322    "str       x6, [sp, #" STRINGIZE_VALUE_OF(SAVED_PROBE_LR_OFFSET) "]" "\n" // Save a duplicate copy of lr (in x6).
     323    "str       lr, [sp, #" STRINGIZE_VALUE_OF(SAVED_PROBE_RETURN_PC_OFFSET) "]" "\n" // Save a duplicate copy of return pc (in lr).
     324
     325    "add       lr, lr, #" STRINGIZE_VALUE_OF(2 * PTR_SIZE) "\n" // The PC after the probe is at 2 instructions past the return point.
    284326    "str       lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_PC_OFFSET) "]" "\n"
    285327
    286     "mrs       x0, nzcv" "\n"
    287     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_NZCV_OFFSET) "]" "\n"
    288     "mrs       x0, fpsr" "\n"
    289     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FPSR_OFFSET) "]" "\n"
    290 
    291     "ldr       x0, [x27, #0 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    292     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "]" "\n"
    293     "ldr       x0, [x27, #1 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    294     "str       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_ARG_OFFSET) "]" "\n"
    295 
    296     "str       d0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q0_OFFSET) "]" "\n"
    297     "str       d1, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q1_OFFSET) "]" "\n"
    298     "str       d2, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q2_OFFSET) "]" "\n"
    299     "str       d3, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q3_OFFSET) "]" "\n"
    300     "str       d4, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q4_OFFSET) "]" "\n"
    301     "str       d5, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q5_OFFSET) "]" "\n"
    302     "str       d6, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q6_OFFSET) "]" "\n"
    303     "str       d7, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q7_OFFSET) "]" "\n"
    304     "str       d8, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q8_OFFSET) "]" "\n"
    305     "str       d9, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q9_OFFSET) "]" "\n"
    306     "str       d10, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q10_OFFSET) "]" "\n"
    307     "str       d11, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q11_OFFSET) "]" "\n"
    308     "str       d12, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q12_OFFSET) "]" "\n"
    309     "str       d13, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q13_OFFSET) "]" "\n"
    310     "str       d14, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q14_OFFSET) "]" "\n"
    311     "str       d15, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q15_OFFSET) "]" "\n"
    312     "str       d16, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q16_OFFSET) "]" "\n"
    313     "str       d17, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q17_OFFSET) "]" "\n"
    314     "str       d18, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q18_OFFSET) "]" "\n"
    315     "str       d19, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q19_OFFSET) "]" "\n"
    316     "str       d20, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q20_OFFSET) "]" "\n"
    317     "str       d21, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q21_OFFSET) "]" "\n"
    318     "str       d22, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q22_OFFSET) "]" "\n"
    319     "str       d23, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q23_OFFSET) "]" "\n"
    320     "str       d24, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q24_OFFSET) "]" "\n"
    321     "str       d25, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q25_OFFSET) "]" "\n"
    322     "str       d26, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q26_OFFSET) "]" "\n"
    323     "str       d27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q27_OFFSET) "]" "\n"
    324     "str       d28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q28_OFFSET) "]" "\n"
    325     "str       d29, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q29_OFFSET) "]" "\n"
    326     "str       d30, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q30_OFFSET) "]" "\n"
    327     "str       d31, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q31_OFFSET) "]" "\n"
    328 
    329     "mov       x28, sp" "\n" // Save the ProbeContext*.
    330 
    331     "mov       x0, sp" "\n" // the ProbeContext* arg.
    332     "ldr       x27, [x27, #3 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    333     "blr       x27" "\n"
    334 
    335     "mov       sp, x28" "\n"
     328    "stp       x0, x1, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_NZCV_OFFSET) "]" "\n" // Store nzcv and fpsr (preloaded into x0 and x1 above).
     329
     330    "stp       x2, x3, [sp, #" STRINGIZE_VALUE_OF(PROBE_PROBE_FUNCTION_OFFSET) "]" "\n" // Store the probe handler function and arg (preloaded into x2 and x3 above).
     331    "str       x8, [sp, #" STRINGIZE_VALUE_OF(SAVED_PROBE_ERROR_FUNCTION_OFFSET) "]" "\n" // Store the probe handler function and arg (preloaded into x8 above).
     332
     333    "add       x9, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q0_OFFSET) "\n"
     334    "stp       d0, d1, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q0)) "]" "\n"
     335    "stp       d2, d3, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q2)) "]" "\n"
     336    "stp       d4, d5, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q4)) "]" "\n"
     337    "stp       d6, d7, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q6)) "]" "\n"
     338    "stp       d8, d9, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q8)) "]" "\n"
     339    "stp       d10, d11, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q10)) "]" "\n"
     340    "stp       d12, d13, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q12)) "]" "\n"
     341    "stp       d14, d15, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q14)) "]" "\n"
     342    "stp       d16, d17, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q16)) "]" "\n"
     343    "stp       d18, d19, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q18)) "]" "\n"
     344    "stp       d20, d21, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q20)) "]" "\n"
     345    "stp       d22, d23, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q22)) "]" "\n"
     346    "stp       d24, d25, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q24)) "]" "\n"
     347    "stp       d26, d27, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q26)) "]" "\n"
     348    "stp       d28, d29, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q28)) "]" "\n"
     349    "stp       d30, d31, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q30)) "]" "\n"
     350
     351    "mov       x27, sp" "\n" // Save the ProbeContext* in a callee saved register.
     352
     353    // Note: we haven't changed the value of fp. Hence, it is still pointing to the frame of
     354    // the caller of the probe (which is what we want in order to play nice with debuggers e.g. lldb).
     355    "mov       x0, sp" "\n" // Set the ProbeContext* arg.
     356    "blr       x2" "\n" // Call the probe handler function (loaded into x2 above).
     357
     358    "mov       sp, x27" "\n"
    336359
    337360    // To enable probes to modify register state, we copy all registers
    338     // out of the ProbeContext before returning. That is except for x18, pc and sp.
    339 
     361    // out of the ProbeContext before returning. That is except for x18.
    340362    // x18 is "reserved for the platform. Conforming software should not make use of it."
    341363    // Hence, the JITs would not be using it, and the probe should also not be modifying it.
    342364    // See https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html.
    343365
    344     // We can't modify the pc, because the only way to set its value on ARM64 is via
    345     // an indirect branch or a ret, which means we'll need a free register to do so.
    346     // The probe mechanism is required to not perturb any registers that the caller
    347     // may use. Hence, we don't have this free register available.
    348 
    349     // In order to return to the caller, we need to ret via lr. The probe mechanism will
    350     // restore lr's value after returning to the caller by loading the restore value
    351     // from the caller save buffer. The caller expects to access the caller save buffer via
    352     // sp. Hence, we cannot allow sp to be modified by the probe.
    353 
    354     "ldr       d0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q0_OFFSET) "]" "\n"
    355     "ldr       d1, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q1_OFFSET) "]" "\n"
    356     "ldr       d2, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q2_OFFSET) "]" "\n"
    357     "ldr       d3, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q3_OFFSET) "]" "\n"
    358     "ldr       d4, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q4_OFFSET) "]" "\n"
    359     "ldr       d5, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q5_OFFSET) "]" "\n"
    360     "ldr       d6, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q6_OFFSET) "]" "\n"
    361     "ldr       d7, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q7_OFFSET) "]" "\n"
    362     "ldr       d8, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q8_OFFSET) "]" "\n"
    363     "ldr       d9, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q9_OFFSET) "]" "\n"
    364     "ldr       d10, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q10_OFFSET) "]" "\n"
    365     "ldr       d11, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q11_OFFSET) "]" "\n"
    366     "ldr       d12, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q12_OFFSET) "]" "\n"
    367     "ldr       d13, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q13_OFFSET) "]" "\n"
    368     "ldr       d14, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q14_OFFSET) "]" "\n"
    369     "ldr       d15, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q15_OFFSET) "]" "\n"
    370     "ldr       d16, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q16_OFFSET) "]" "\n"
    371     "ldr       d17, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q17_OFFSET) "]" "\n"
    372     "ldr       d18, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q18_OFFSET) "]" "\n"
    373     "ldr       d19, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q19_OFFSET) "]" "\n"
    374     "ldr       d20, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q20_OFFSET) "]" "\n"
    375     "ldr       d21, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q21_OFFSET) "]" "\n"
    376     "ldr       d22, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q22_OFFSET) "]" "\n"
    377     "ldr       d23, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q23_OFFSET) "]" "\n"
    378     "ldr       d24, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q24_OFFSET) "]" "\n"
    379     "ldr       d25, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q25_OFFSET) "]" "\n"
    380     "ldr       d26, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q26_OFFSET) "]" "\n"
    381     "ldr       d27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q27_OFFSET) "]" "\n"
    382     "ldr       d28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q28_OFFSET) "]" "\n"
    383     "ldr       d29, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q29_OFFSET) "]" "\n"
    384     "ldr       d30, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q30_OFFSET) "]" "\n"
    385     "ldr       d31, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q31_OFFSET) "]" "\n"
    386 
    387     "ldr       x0, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X0_OFFSET) "]" "\n"
    388     "ldr       x1, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X1_OFFSET) "]" "\n"
    389     "ldr       x2, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X2_OFFSET) "]" "\n"
    390     "ldr       x3, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X3_OFFSET) "]" "\n"
    391     "ldr       x4, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X4_OFFSET) "]" "\n"
    392     "ldr       x5, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X5_OFFSET) "]" "\n"
    393     "ldr       x6, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X6_OFFSET) "]" "\n"
    394     "ldr       x7, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X7_OFFSET) "]" "\n"
    395     "ldr       x8, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X8_OFFSET) "]" "\n"
    396     "ldr       x9, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X9_OFFSET) "]" "\n"
    397     "ldr       x10, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X10_OFFSET) "]" "\n"
    398     "ldr       x11, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X11_OFFSET) "]" "\n"
    399     "ldr       x12, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X12_OFFSET) "]" "\n"
    400     "ldr       x13, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X13_OFFSET) "]" "\n"
    401     "ldr       x14, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X14_OFFSET) "]" "\n"
    402     "ldr       x15, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X15_OFFSET) "]" "\n"
    403     "ldr       x16, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X16_OFFSET) "]" "\n"
    404     "ldr       x17, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X17_OFFSET) "]" "\n"
     366    "add       x9, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_Q0_OFFSET) "\n"
     367    "ldp       d0, d1, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q0)) "]" "\n"
     368    "ldp       d2, d3, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q2)) "]" "\n"
     369    "ldp       d4, d5, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q4)) "]" "\n"
     370    "ldp       d6, d7, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q6)) "]" "\n"
     371    "ldp       d8, d9, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q8)) "]" "\n"
     372    "ldp       d10, d11, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q10)) "]" "\n"
     373    "ldp       d12, d13, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q12)) "]" "\n"
     374    "ldp       d14, d15, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q14)) "]" "\n"
     375    "ldp       d16, d17, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q16)) "]" "\n"
     376    "ldp       d18, d19, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q18)) "]" "\n"
     377    "ldp       d20, d21, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q20)) "]" "\n"
     378    "ldp       d22, d23, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q22)) "]" "\n"
     379    "ldp       d24, d25, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q24)) "]" "\n"
     380    "ldp       d26, d27, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q26)) "]" "\n"
     381    "ldp       d28, d29, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q28)) "]" "\n"
     382    "ldp       d30, d31, [x9, #" STRINGIZE_VALUE_OF(FPR_OFFSET(Q30)) "]" "\n"
     383
     384    "ldp       x0, x1, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X0_OFFSET) "]" "\n"
     385    "ldp       x2, x3, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X2_OFFSET) "]" "\n"
     386    "ldp       x4, x5, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X4_OFFSET) "]" "\n"
     387    "ldp       x6, x7, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X6_OFFSET) "]" "\n"
     388    "ldp       x8, x9, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X8_OFFSET) "]" "\n"
     389    "ldp       x10, x11, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X10_OFFSET) "]" "\n"
     390    "ldp       x12, x13, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X12_OFFSET) "]" "\n"
     391    "ldp       x14, x15, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X14_OFFSET) "]" "\n"
     392    "ldp       x16, x17, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X16_OFFSET) "]" "\n"
    405393    // x18 should not be modified by the probe. See comment above for details.
    406     "ldr       x19, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X19_OFFSET) "]" "\n"
    407     "ldr       x20, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X20_OFFSET) "]" "\n"
    408     "ldr       x21, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X21_OFFSET) "]" "\n"
    409     "ldr       x22, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X22_OFFSET) "]" "\n"
    410     "ldr       x23, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X23_OFFSET) "]" "\n"
    411     "ldr       x24, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X24_OFFSET) "]" "\n"
    412     "ldr       x25, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X25_OFFSET) "]" "\n"
    413     "ldr       x26, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X26_OFFSET) "]" "\n"
    414 
    415     "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FPSR_OFFSET) "]" "\n"
    416     "msr       fpsr, x27" "\n"
    417 
    418     "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_NZCV_OFFSET) "]" "\n"
     394    "ldp       x19, x20, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X19_OFFSET) "]" "\n"
     395    "ldp       x21, x22, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X21_OFFSET) "]" "\n"
     396    "ldp       x23, x24, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X23_OFFSET) "]" "\n"
     397    "ldp       x25, x26, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X25_OFFSET) "]" "\n"
     398
     399    // Remaining registers to restore are: fpsr, nzcv, x27, x28, fp, lr, sp, and pc.
     400
     401    "mov       lr, #" STRINGIZE_VALUE_OF(STATE_PC_NOT_CHANGED) "\n"
     402
     403    // The only way to set the pc on ARM64 (from user space) is via an indirect branch
     404    // or a ret, which means we'll need a free register to do so. For our purposes, lr
     405    // happens to be available in applications of the probe where we may want to
     406    // continue executing at a different location (i.e. change the pc) after the probe
     407    // returns. So, the ARM64 probe implementation will allow the probe handler to
     408    // either modify lr or pc, but not both in the same probe invocation. The probe
     409    // mechanism ensures that we never try to modify both lr and pc, else it will
     410    // fail with a RELEASE_ASSERT_NOT_REACHED in arm64ProbeError().
     411
     412    // Determine if the probe handler changed the pc.
     413    "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(SAVED_PROBE_RETURN_PC_OFFSET) "]" "\n"
     414    "ldr       x28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_PC_OFFSET) "]" "\n"
     415    "add       x27, x27, #" STRINGIZE_VALUE_OF(2 * PTR_SIZE) "\n"
     416    "cmp       x27, x28" "\n"
     417    "beq     " LOCAL_LABEL_STRING(ctiMasmProbeTrampolinePrepareOutgoingRecords) "\n"
     418
     419    // pc was changed. Determine if the probe handler also changed lr.
     420    "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(SAVED_PROBE_LR_OFFSET) "]" "\n"
     421    "ldr       x28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_LR_OFFSET) "]" "\n"
     422    "cmp       x27, x28" "\n"
     423    "bne     " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineError) "\n"
     424
     425    "mov       lr, #" STRINGIZE_VALUE_OF(STATE_PC_CHANGED) "\n"
     426
     427    LOCAL_LABEL_STRING(ctiMasmProbeTrampolinePrepareOutgoingRecords) ":" "\n"
     428
     429    "ldr       fp, [sp, #" STRINGIZE_VALUE_OF(SAVED_PROBE_RETURN_PC_OFFSET) "]" "\n" // Preload the probe return site pc.
     430
     431    // The probe handler may have moved the sp. For the return process, we may need
     432    // space for 2 OutgoingProbeRecords below the final sp value. We need to make
     433    // sure that the space for these 2 OutgoingProbeRecords do not overlap the
     434    // restore values of the registers.
     435
     436    // All restore values are located at offset <= PROBE_CPU_FPSR_OFFSET. Hence,
     437    // we need to make sure that resultant sp > offset of fpsr + 2 * sizeof(OutgoingProbeRecord).
     438
     439    "add       x27, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FPSR_OFFSET + 2 * OUT_SIZE) "\n"
     440    "ldr       x28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_SP_OFFSET) "]" "\n"
     441    "cmp       x28, x27" "\n"
     442    "bgt     " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineFillOutgoingProbeRecords) "\n"
     443
     444    // There is overlap. We need to copy the ProbeContext to a safe area first.
     445    // Let's locate the "safe area" at 2x sizeof(ProbeContext) below where the OutgoingProbeRecords are.
     446    // This ensures that:
     447    // 1. The safe area does not overlap the OutgoingProbeRecords.
     448    // 2. The safe area does not overlap the ProbeContext.
     449
     450    // x28 already contains [sp, #STRINGIZE_VALUE_OF(PROBE_CPU_SP_OFFSET)].
     451    "sub       x28, x28, #" STRINGIZE_VALUE_OF(2 * PROBE_SIZE) "\n"
     452
     453    "mov       x27, sp" "\n" // Save the original ProbeContext*.
     454
     455    // Make sure the stack pointer points to the safe area. This ensures that the
     456    // safe area is protected from interrupt handlers overwriting it.
     457    "mov       sp, x28" "\n" // sp now points to the new ProbeContext in the safe area.
     458
     459    // Copy the relevant restore data to the new ProbeContext*.
     460    "str       lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X0_OFFSET) "]" "\n" // Stash the pc changed state away so that we can use lr.
     461
     462    "ldp       x28, lr, [x27, #" STRINGIZE_VALUE_OF(PROBE_CPU_X27_OFFSET) "]" "\n" // copy x27 and x28.
     463    "stp       x28, lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X27_OFFSET) "]" "\n"
     464    "ldp       x28, lr, [x27, #" STRINGIZE_VALUE_OF(PROBE_CPU_FP_OFFSET) "]" "\n" // copy fp and lr.
     465    "stp       x28, lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FP_OFFSET) "]" "\n"
     466    "ldp       x28, lr, [x27, #" STRINGIZE_VALUE_OF(PROBE_CPU_SP_OFFSET) "]" "\n" // copy sp and pc.
     467    "stp       x28, lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_SP_OFFSET) "]" "\n"
     468    "ldp       x28, lr, [x27, #" STRINGIZE_VALUE_OF(PROBE_CPU_NZCV_OFFSET) "]" "\n" // copy nzcv and fpsr.
     469    "stp       x28, lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_NZCV_OFFSET) "]" "\n"
     470
     471    "ldr       lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X0_OFFSET) "]" "\n" // Retrieve the stashed the pc changed state.
     472
     473    LOCAL_LABEL_STRING(ctiMasmProbeTrampolineFillOutgoingProbeRecords) ":" "\n"
     474
     475    "cbnz       lr, " LOCAL_LABEL_STRING(ctiMasmProbeTrampolineEnd) "\n" // Skip lr restoration setup if state (in lr) == STATE_PC_CHANGED.
     476
     477    // In order to restore lr, we need to do the restoration at the probe return site.
     478    // The probe return site expects sp to be pointing at an OutgoingProbeRecord such that
     479    // popping the OutgoingProbeRecord will yield the desired sp. The probe return site
     480    // also expects the lr value to be restored is stashed in the OutgoingProbeRecord.
     481    // We can make this happen by pushing 2 OutgoingProbeRecords instead of 1:
     482    // 1 for the probe return site, and 1 at ctiMasmProbeTrampolineEnd for returning from
     483    // this probe.
     484
     485    // Fill in the OutgoingProbeStack for the probe return site.
     486    "ldr       lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_SP_OFFSET) "]" "\n"
     487    "sub       lr, lr, #" STRINGIZE_VALUE_OF(OUT_SIZE) "\n"
     488
     489    "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_LR_OFFSET) "]" "\n"
     490    "str       x27, [lr, #" STRINGIZE_VALUE_OF(OUT_LR_OFFSET) "]" "\n"
     491
     492    // Set up the sp and pc values so that ctiMasmProbeTrampolineEnd will return to the probe return site.
     493    "str       lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_SP_OFFSET) "]" "\n"
     494    "str       fp, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_PC_OFFSET) "]" "\n" // Store the probe return site pc (preloaded into fp above).
     495
     496    LOCAL_LABEL_STRING(ctiMasmProbeTrampolineEnd) ":" "\n"
     497
     498    // Fill in the OutgoingProbeStack.
     499    "ldr       lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_SP_OFFSET) "]" "\n"
     500    "sub       lr, lr, #" STRINGIZE_VALUE_OF(OUT_SIZE) "\n"
     501
     502    "ldp       x27, x28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_NZCV_OFFSET) "]" "\n"
     503    "stp       x27, x28, [lr, #" STRINGIZE_VALUE_OF(OUT_NZCV_OFFSET) "]" "\n"
     504    "ldp       x27, x28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X27_OFFSET) "]" "\n"
     505    "stp       x27, x28, [lr, #" STRINGIZE_VALUE_OF(OUT_X27_OFFSET) "]" "\n"
     506    "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FP_OFFSET) "]" "\n"
     507    "ldr       x28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_PC_OFFSET) "]" "\n"
     508    "stp       x27, x28, [lr, #" STRINGIZE_VALUE_OF(OUT_FP_OFFSET) "]" "\n"
     509    "mov       sp, lr" "\n"
     510
     511    // Restore the remaining registers and pop the OutgoingProbeStack.
     512    "ldp       x27, x28, [sp], #" STRINGIZE_VALUE_OF(2 * PTR_SIZE) "\n"
    419513    "msr       nzcv, x27" "\n"
    420     "ldr       fp, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_FP_OFFSET) "]" "\n"
    421 
    422     "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X27_OFFSET) "]" "\n"
    423     "ldr       x28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X28_OFFSET) "]" "\n"
    424 
    425     // There are 4 more registers left to restore: x27, x28, lr, sp, and pc.
    426     // The JIT code's lr and sp will be restored by the caller.
    427 
    428     // Restore pc by loading it into lr. The ret below will put in the pc.
    429     "ldr       lr, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_PC_OFFSET) "]" "\n"
    430 
    431     // We need x27 as a scratch register to help with popping the ProbeContext.
    432     // Hence, before we pop the ProbeContext, we need to copy the restore value
    433     // for x27 from the ProbeContext to the caller save buffer.
    434     "ldr       x28, [sp, #" STRINGIZE_VALUE_OF(SAVED_CALLER_SP) "]" "\n"
    435     "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X27_OFFSET) "]" "\n"
    436     "str       x27, [x28, #4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    437 
    438     // Since lr is also restored by the caller, we need to copy its restore
    439     // value to the caller save buffer too.
    440     "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_LR_OFFSET) "]" "\n"
    441     "str       x27, [x28, #6 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    442 
    443     // We're now done with x28, and can restore its value.
    444     "ldr       x28, [sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_X28_OFFSET) "]" "\n"
    445 
    446     // We're now done with the ProbeContext, and can pop it to restore sp so that
    447     // it points to the caller save buffer.
    448     "ldr       x27, [sp, #" STRINGIZE_VALUE_OF(SAVED_CALLER_SP) "]" "\n"
    449     "mov       sp, x27" "\n"
    450 
    451     // We're now done with x27, and can restore it.
    452     "ldr       x27, [sp, #4 * " STRINGIZE_VALUE_OF(PTR_SIZE) "]" "\n"
    453 
     514    "msr       fpsr, x28" "\n"
     515    "ldp       x27, x28, [sp], #" STRINGIZE_VALUE_OF(2 * PTR_SIZE) "\n"
     516    "ldp       fp, lr, [sp], #" STRINGIZE_VALUE_OF(2 * PTR_SIZE) "\n"
    454517    "ret" "\n"
     518
     519    LOCAL_LABEL_STRING(ctiMasmProbeTrampolineError) ":" "\n"
     520    // The probe handler changed both lr and pc. This is not supported for ARM64.
     521    "ldr       x1, [sp, #" STRINGIZE_VALUE_OF(SAVED_PROBE_ERROR_FUNCTION_OFFSET) "]" "\n"
     522    "mov       x0, sp" "\n" // Set the ProbeContext* arg.
     523    "blr       x1" "\n"
     524    "brk       #0x1000" // Should never return here.
    455525);
    456526#endif // COMPILER(GCC_OR_CLANG)
    457527
    458 static void arm64ProbeTrampoline(ProbeContext* context)
     528static NO_RETURN_DUE_TO_CRASH void arm64ProbeError(ProbeContext*)
    459529{
    460     void* origSP = context->cpu.sp();
    461     void* origPC = context->cpu.pc();
    462    
    463     context->probeFunction(context);
    464    
    465     if (context->cpu.sp() != origSP) {
    466         dataLog("MacroAssembler probe ERROR: ARM64 does not support the probe changing the SP. The change will be ignored\n");
    467         context->cpu.sp() = origSP;
    468     }
    469     if (context->cpu.pc() != origPC) {
    470         dataLog("MacroAssembler probe ERROR: ARM64 does not support the probe changing the PC. The change will be ignored\n");
    471         context->cpu.pc() = origPC;
    472     }
     530    dataLog("MacroAssembler probe ERROR: ARM64 does not support the probe changing both LR and PC.\n");
     531    RELEASE_ASSERT_NOT_REACHED();
    473532}
    474533
    475534void MacroAssembler::probe(ProbeFunction function, void* arg)
    476535{
    477     sub64(TrustedImm32(7 * 8), sp);
    478 
    479     store64(x27, Address(sp, 3 * 8));
    480     store64(x28, Address(sp, 4 * 8));
    481     store64(lr, Address(sp, 5 * 8));
    482 
    483     add64(TrustedImm32(7 * 8), sp, x28);
    484     store64(x28, Address(sp, 6 * 8)); // Save original sp value.
    485 
    486     move(TrustedImmPtr(reinterpret_cast<void*>(function)), x28);
    487     store64(x28, Address(sp));
    488     move(TrustedImmPtr(arg), x28);
    489     store64(x28, Address(sp, 1 * 8));
    490     move(TrustedImmPtr(reinterpret_cast<void*>(arm64ProbeTrampoline)), x28);
    491     store64(x28, Address(sp, 2 * 8));
    492 
    493     move(TrustedImmPtr(reinterpret_cast<void*>(ctiMasmProbeTrampoline)), x28);
    494     m_assembler.blr(x28);
    495 
    496     // ctiMasmProbeTrampoline should have restored every register except for
    497     // lr and the sp.
    498     load64(Address(sp, 5 * 8), lr);
    499     add64(TrustedImm32(7 * 8), sp);
     536    sub64(TrustedImm32(sizeof(IncomingProbeRecord)), sp);
     537
     538    storePair64(x26, x27, sp, TrustedImm32(offsetof(IncomingProbeRecord, x26)));
     539    add64(TrustedImm32(sizeof(IncomingProbeRecord)), sp, x26);
     540    storePair64(lr, x26, sp, TrustedImm32(offsetof(IncomingProbeRecord, lr))); // Save lr and original sp value.
     541    move(TrustedImmPtr(reinterpret_cast<void*>(function)), x26);
     542    move(TrustedImmPtr(arg), x27);
     543    storePair64(x26, x27, sp, TrustedImm32(offsetof(IncomingProbeRecord, probeHandlerFunction)));
     544    move(TrustedImmPtr(reinterpret_cast<void*>(arm64ProbeError)), x27);
     545    store64(x27, Address(sp, offsetof(IncomingProbeRecord, probeErrorFunction)));
     546
     547    move(TrustedImmPtr(reinterpret_cast<void*>(ctiMasmProbeTrampoline)), x26);
     548    m_assembler.blr(x26);
     549
     550    // ctiMasmProbeTrampoline should have restored every register except for lr and the sp.
     551    load64(Address(sp, offsetof(OutgoingProbeRecord, lr)), lr);
     552    add64(TrustedImm32(sizeof(OutgoingProbeRecord)), sp);
    500553}
    501554#endif // ENABLE(MASM_PROBE)
  • trunk/Source/JavaScriptCore/assembler/testmasm.cpp

    r219885 r219951  
    7272#define testWord(x) testWord32(x)
    7373#endif
     74#define testDoubleWord(x) static_cast<double>(testWord(x))
    7475
    7576// Nothing fancy for now; we just use the existing WTF assertion machinery.
     
    8283    } while (false)
    8384
     85#define CHECK_DOUBLE_BITWISE_EQ(a, b) \
     86    CHECK(bitwise_cast<uint64_t>(a) == bitwise_cast<uint64_t>(a))
     87
    8488#if ENABLE(MASM_PROBE)
    8589bool isPC(MacroAssembler::RegisterID id)
     
    101105{
    102106    return id == MacroAssembler::framePointerRegister;
     107}
     108
     109bool isSpecialGPR(MacroAssembler::RegisterID id)
     110{
     111    if (isPC(id) || isSP(id) || isFP(id))
     112        return true;
     113#if CPU(ARM64)
     114    if (id == ARM64Registers::x18)
     115        return true;
     116#endif
     117    return false;
    103118}
    104119#endif // ENABLE(MASM_PROBE)
     
    165180            CHECK(context->gpr(GPRInfo::argumentGPR3) == testWord(3));
    166181
    167             CHECK(context->fpr(FPRInfo::fpRegT0) == testWord32(0));
    168             CHECK(context->fpr(FPRInfo::fpRegT1) == testWord32(1));
     182            CHECK_DOUBLE_BITWISE_EQ(context->fpr(FPRInfo::fpRegT0), static_cast<double>(testWord32(0)));
     183            CHECK_DOUBLE_BITWISE_EQ(context->fpr(FPRInfo::fpRegT1),  static_cast<double>(testWord32(1)));
    169184        });
    170185        jit.emitFunctionEpilogue();
     
    218233            CHECK(context->gpr(GPRInfo::argumentGPR3) == testWord(3));
    219234
    220             CHECK(context->fpr(FPRInfo::fpRegT0) == testWord32(0));
    221             CHECK(context->fpr(FPRInfo::fpRegT1) == testWord32(1));
     235            CHECK_DOUBLE_BITWISE_EQ(context->fpr(FPRInfo::fpRegT0), static_cast<double>(testWord32(0)));
     236            CHECK_DOUBLE_BITWISE_EQ(context->fpr(FPRInfo::fpRegT1), static_cast<double>(testWord32(1)));
    222237        });
    223238
     
    257272            for (auto id = CCallHelpers::firstRegister(); id <= CCallHelpers::lastRegister(); id = nextID(id)) {
    258273                originalState.gpr(id) = context->gpr(id);
    259                 if (isPC(id) || isSP(id) || isFP(id))
     274                if (isSpecialGPR(id))
    260275                    continue;
    261276                context->gpr(id) = testWord(static_cast<int>(id));
     
    263278            for (auto id = CCallHelpers::firstFPRegister(); id <= CCallHelpers::lastFPRegister(); id = nextID(id)) {
    264279                originalState.fpr(id) = context->fpr(id);
    265                 context->fpr(id) = testWord(id);
     280                context->fpr(id) = testDoubleWord(id);
    266281            }
    267282        });
     
    278293            probeCallCount++;
    279294            for (auto id = CCallHelpers::firstRegister(); id <= CCallHelpers::lastRegister(); id = nextID(id)) {
    280                 if (isPC(id))
    281                     continue;
    282295                if (isSP(id) || isFP(id)) {
    283296                    CHECK(context->gpr(id) == originalState.gpr(id));
    284297                    continue;
    285298                }
     299                if (isSpecialGPR(id))
     300                    continue;
    286301                CHECK(context->gpr(id) == testWord(id));
    287302            }
    288303            for (auto id = CCallHelpers::firstFPRegister(); id <= CCallHelpers::lastFPRegister(); id = nextID(id))
    289                 CHECK(context->fpr(id) == testWord(id));
     304                CHECK_DOUBLE_BITWISE_EQ(context->fpr(id), testDoubleWord(id));
    290305        });
    291306
     
    294309            probeCallCount++;
    295310            for (auto id = CCallHelpers::firstRegister(); id <= CCallHelpers::lastRegister(); id = nextID(id)) {
    296                 if (isPC(id) || isSP(id) || isFP(id))
     311                if (isSpecialGPR(id))
    297312                    continue;
    298313                context->gpr(id) = originalState.gpr(id);
     
    306321            probeCallCount++;
    307322            for (auto id = CCallHelpers::firstRegister(); id <= CCallHelpers::lastRegister(); id = nextID(id)) {
    308                 if (isPC(id) || isSP(id) || isFP(id))
     323                if (isSpecialGPR(id))
    309324                    continue;
    310325                CHECK(context->gpr(id) == originalState.gpr(id));
    311326            }
    312327            for (auto id = CCallHelpers::firstFPRegister(); id <= CCallHelpers::lastFPRegister(); id = nextID(id))
    313                 CHECK(context->fpr(id) == originalState.fpr(id));
     328                CHECK_DOUBLE_BITWISE_EQ(context->fpr(id), originalState.fpr(id));
    314329        });
    315330
     
    326341    uint8_t* originalSP { nullptr };
    327342    void* modifiedSP { nullptr };
    328 #if CPU(X86) || CPU(X86_64) || CPU(ARM_THUMB2) || CPU(ARM_TRADITIONAL)
    329343    uintptr_t modifiedFlags { 0 };
     344   
     345#if CPU(X86) || CPU(X86_64)
     346    auto flagsSPR = X86Registers::eflags;
     347    uintptr_t flagsMask = 0xc5;
     348#elif CPU(ARM_THUMB2) || CPU(ARM_TRADITIONAL)
     349    auto flagsSPR = ARMRegisters::apsr;
     350    uintptr_t flagsMask = 0xf0000000;
     351#elif CPU(ARM64)
     352    auto flagsSPR = ARM64Registers::nzcv;
     353    uintptr_t flagsMask = 0xf0000000;
    330354#endif
    331355
     
    339363            for (auto id = CCallHelpers::firstRegister(); id <= CCallHelpers::lastRegister(); id = nextID(id)) {
    340364                originalState.gpr(id) = context->gpr(id);
    341                 if (isPC(id) || isSP(id) || isFP(id))
     365                if (isSpecialGPR(id))
    342366                    continue;
    343367                context->gpr(id) = testWord(static_cast<int>(id));
     
    347371                context->fpr(id) = testWord(id);
    348372            }
    349 #if CPU(X86) || CPU(X86_64)
    350             originalState.spr(X86Registers::eflags) = context->spr(X86Registers::eflags);
    351             modifiedFlags = originalState.spr(X86Registers::eflags) ^ 0xc5;
    352             context->spr(X86Registers::eflags) = modifiedFlags;
    353 #elif CPU(ARM_THUMB2) || CPU(ARM_TRADITIONAL)
    354             originalState.spr(ARMRegisters::apsr) = context->spr(ARMRegisters::apsr);
    355             modifiedFlags = originalState.spr(ARMRegisters::apsr) ^ 0xf0000000;
    356             context->spr(ARMRegisters::apsr) = modifiedFlags;
    357 #endif
     373
     374            originalState.spr(flagsSPR) = context->spr(flagsSPR);
     375            modifiedFlags = originalState.spr(flagsSPR) ^ flagsMask;
     376            context->spr(flagsSPR) = modifiedFlags;
     377
    358378            originalSP = reinterpret_cast<uint8_t*>(context->sp());
    359379            modifiedSP = computeModifiedStack(context);
     
    365385            probeCallCount++;
    366386            for (auto id = CCallHelpers::firstRegister(); id <= CCallHelpers::lastRegister(); id = nextID(id)) {
    367                 if (isPC(id) || isSP(id))
    368                     continue;
    369387                if (isFP(id)) {
    370388                    CHECK(context->gpr(id) == originalState.gpr(id));
    371389                    continue;
    372390                }
     391                if (isSpecialGPR(id))
     392                    continue;
    373393                CHECK(context->gpr(id) == testWord(id));
    374394            }
    375395            for (auto id = CCallHelpers::firstFPRegister(); id <= CCallHelpers::lastFPRegister(); id = nextID(id))
    376                 CHECK(context->fpr(id) == testWord(id));
    377 #if CPU(X86) || CPU(X86_64)
    378             CHECK(context->spr(X86Registers::eflags) == modifiedFlags);
    379 #elif CPU(ARM_THUMB2) || CPU(ARM_TRADITIONAL)
    380             CHECK(context->spr(ARMRegisters::apsr) == modifiedFlags);
    381 #endif
     396                CHECK_DOUBLE_BITWISE_EQ(context->fpr(id), testDoubleWord(id));
     397            CHECK(context->spr(flagsSPR) == modifiedFlags);
    382398            CHECK(context->sp() == modifiedSP);
    383399        });
     
    387403            probeCallCount++;
    388404            for (auto id = CCallHelpers::firstRegister(); id <= CCallHelpers::lastRegister(); id = nextID(id)) {
    389                 if (isPC(id) || isSP(id) || isFP(id))
     405                if (isSpecialGPR(id))
    390406                    continue;
    391407                context->gpr(id) = originalState.gpr(id);
     
    393409            for (auto id = CCallHelpers::firstFPRegister(); id <= CCallHelpers::lastFPRegister(); id = nextID(id))
    394410                context->fpr(id) = originalState.fpr(id);
    395 #if CPU(X86) || CPU(X86_64)
    396             context->spr(X86Registers::eflags) = originalState.spr(X86Registers::eflags);
    397 #elif CPU(ARM_THUMB2) || CPU(ARM_TRADITIONAL)
    398             context->spr(ARMRegisters::apsr) = originalState.spr(ARMRegisters::apsr);
    399 #endif
     411            context->spr(flagsSPR) = originalState.spr(flagsSPR);
    400412            context->sp() = originalSP;
    401413        });
     
    405417            probeCallCount++;
    406418            for (auto id = CCallHelpers::firstRegister(); id <= CCallHelpers::lastRegister(); id = nextID(id)) {
    407                 if (isPC(id) || isSP(id) || isFP(id))
     419                if (isSpecialGPR(id))
    408420                    continue;
    409421                CHECK(context->gpr(id) == originalState.gpr(id));
    410422            }
    411423            for (auto id = CCallHelpers::firstFPRegister(); id <= CCallHelpers::lastFPRegister(); id = nextID(id))
    412                 CHECK(context->fpr(id) == originalState.fpr(id));
    413 #if CPU(X86) || CPU(X86_64)
    414             CHECK(context->spr(X86Registers::eflags) == originalState.spr(X86Registers::eflags));
    415 #elif CPU(ARM_THUMB2) || CPU(ARM_TRADITIONAL)
    416             CHECK(context->spr(ARMRegisters::apsr) == originalState.spr(ARMRegisters::apsr));
    417 #endif
     424                CHECK_DOUBLE_BITWISE_EQ(context->fpr(id),  originalState.fpr(id));
     425            CHECK(context->spr(flagsSPR) == originalState.spr(flagsSPR));
    418426            CHECK(context->sp() == originalSP);
    419427        });
     
    427435void testProbeModifiesStackPointerToInsideProbeContextOnStack()
    428436{
    429     for (size_t offset = 0; offset < sizeof(ProbeContext); offset += sizeof(uintptr_t)) {
     437    size_t increment = sizeof(uintptr_t);
     438#if CPU(ARM64)
     439    // The ARM64 probe uses ldp and stp which require 16 byte alignment.
     440    increment = 2 * sizeof(uintptr_t);
     441#endif
     442    for (size_t offset = 0; offset < sizeof(ProbeContext); offset += increment) {
    430443        testProbeModifiesStackPointer([=] (ProbeContext* context) -> void* {
    431444            return reinterpret_cast<uint8_t*>(context) + offset;
     
    436449void testProbeModifiesStackPointerToNBytesBelowSP()
    437450{
    438     for (size_t offset = 0; offset < 1 * KB; offset += sizeof(uintptr_t)) {
     451    size_t increment = sizeof(uintptr_t);
     452#if CPU(ARM64)
     453    // The ARM64 probe uses ldp and stp which require 16 byte alignment.
     454    increment = 2 * sizeof(uintptr_t);
     455#endif
     456    for (size_t offset = 0; offset < 1 * KB; offset += increment) {
    439457        testProbeModifiesStackPointer([=] (ProbeContext* context) -> void* {
    440458            return reinterpret_cast<uint8_t*>(context->cpu.sp()) - offset;
Note: See TracChangeset for help on using the changeset viewer.