Changeset 220871 in webkit


Ignore:
Timestamp:
Aug 17, 2017 12:57:46 PM (7 years ago)
Author:
mark.lam@apple.com
Message:

Only use 16 VFP registers if !CPU(ARM_NEON).
https://bugs.webkit.org/show_bug.cgi?id=175514

Reviewed by JF Bastien.

Source/JavaScriptCore:

Deleted q16-q31 FPQuadRegisterID enums in ARMv7Assembler.h. The NEON spec
says that there are only 16 128-bit NEON registers. This change is merely to
correct the code documentation of these registers. The FPQuadRegisterID are
currently unused.

  • assembler/ARMAssembler.h:

(JSC::ARMAssembler::lastFPRegister):
(JSC::ARMAssembler::fprName):

  • assembler/ARMv7Assembler.h:

(JSC::ARMv7Assembler::lastFPRegister):
(JSC::ARMv7Assembler::fprName):

  • assembler/MacroAssemblerARM.cpp:
  • assembler/MacroAssemblerARMv7.cpp:

Source/WTF:

If CPU(ARM_NEON) is not enabled, we'll conservatively assume only VFP2 support is
available. Hence, we'll only the first 16 FPDoubleRegisterIDs are available.

For reference, see:
NEON registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJACABEJ.html
VFP2 and VFP3 registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CIHDIBDG.html
NEON to VFP register mapping: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJAIJHFC.html

This is mostly for GTK toolchains which may target older ARM CPUs which only have
VFP2 support.

  • wtf/Platform.h:
Location:
trunk/Source
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/JavaScriptCore/ChangeLog

    r220870 r220871  
     12017-08-17  Mark Lam  <mark.lam@apple.com>
     2
     3        Only use 16 VFP registers if !CPU(ARM_NEON).
     4        https://bugs.webkit.org/show_bug.cgi?id=175514
     5
     6        Reviewed by JF Bastien.
     7
     8        Deleted q16-q31 FPQuadRegisterID enums in ARMv7Assembler.h.  The NEON spec
     9        says that there are only 16 128-bit NEON registers.  This change is merely to
     10        correct the code documentation of these registers.  The FPQuadRegisterID are
     11        currently unused.
     12
     13        * assembler/ARMAssembler.h:
     14        (JSC::ARMAssembler::lastFPRegister):
     15        (JSC::ARMAssembler::fprName):
     16        * assembler/ARMv7Assembler.h:
     17        (JSC::ARMv7Assembler::lastFPRegister):
     18        (JSC::ARMv7Assembler::fprName):
     19        * assembler/MacroAssemblerARM.cpp:
     20        * assembler/MacroAssemblerARMv7.cpp:
     21
    1222017-08-17  Andreas Kling  <akling@apple.com>
    223
  • trunk/Source/JavaScriptCore/assembler/ARMAssembler.h

    r219740 r220871  
    8888            d14,
    8989            d15,
     90#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    9091            d16,
    9192            d17,
     
    104105            d30,
    105106            d31,
     107#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    106108
    107109            // Pseudonyms for some of the registers.
     
    135137
    136138        static constexpr FPRegisterID firstFPRegister() { return ARMRegisters::d0; }
     139#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    137140        static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d31; }
     141#else
     142        static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d15; }
     143#endif
    138144        static constexpr unsigned numberOfFPRegisters() { return lastFPRegister() - firstFPRegister() + 1; }
    139145
     
    167173                "d8", "d9", "d10", "d11",
    168174                "d12", "d13", "d14", "d15",
     175#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    169176                "d16", "d17", "d18", "d19",
    170177                "d20", "d21", "d22", "d23",
    171178                "d24", "d25", "d26", "d27",
    172179                "d28", "d29", "d30", "d31"
     180#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    173181            };
    174182            return nameForRegister[id];
  • trunk/Source/JavaScriptCore/assembler/ARMv7Assembler.h

    r219740 r220871  
    124124        d14,
    125125        d15,
     126#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    126127        d16,
    127128        d17,
     
    140141        d30,
    141142        d31,
     143#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    142144    } FPDoubleRegisterID;
    143145
     146#if CPU(ARM_NEON)
    144147    typedef enum {
    145148        q0,
     
    159162        q14,
    160163        q15,
    161         q16,
    162         q17,
    163         q18,
    164         q19,
    165         q20,
    166         q21,
    167         q22,
    168         q23,
    169         q24,
    170         q25,
    171         q26,
    172         q27,
    173         q28,
    174         q29,
    175         q30,
    176         q31,
    177164    } FPQuadRegisterID;
     165#endif // CPU(ARM_NEON)
    178166
    179167    inline FPSingleRegisterID asSingle(FPDoubleRegisterID reg)
     
    434422    typedef ARMRegisters::FPSingleRegisterID FPSingleRegisterID;
    435423    typedef ARMRegisters::FPDoubleRegisterID FPDoubleRegisterID;
     424#if CPU(ARM_NEON)
    436425    typedef ARMRegisters::FPQuadRegisterID FPQuadRegisterID;
     426#endif
    437427    typedef ARMRegisters::SPRegisterID SPRegisterID;
    438428    typedef FPDoubleRegisterID FPRegisterID;
     
    447437
    448438    static constexpr FPRegisterID firstFPRegister() { return ARMRegisters::d0; }
     439#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    449440    static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d31; }
     441#else
     442    static constexpr FPRegisterID lastFPRegister() { return ARMRegisters::d15; }
     443#endif
    450444    static constexpr unsigned numberOfFPRegisters() { return lastFPRegister() - firstFPRegister() + 1; }
    451445
     
    479473            "d8", "d9", "d10", "d11",
    480474            "d12", "d13", "d14", "d15",
     475#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    481476            "d16", "d17", "d18", "d19",
    482477            "d20", "d21", "d22", "d23",
    483478            "d24", "d25", "d26", "d27",
    484479            "d28", "d29", "d30", "d31"
     480#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    485481        };
    486482        return nameForRegister[id];
  • trunk/Source/JavaScriptCore/assembler/MacroAssemblerARM.cpp

    r220850 r220871  
    154154#define PROBE_CPU_D14_OFFSET (PROBE_FIRST_FPREG_OFFSET + (14 * FPREG_SIZE))
    155155#define PROBE_CPU_D15_OFFSET (PROBE_FIRST_FPREG_OFFSET + (15 * FPREG_SIZE))
     156
     157#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    156158#define PROBE_CPU_D16_OFFSET (PROBE_FIRST_FPREG_OFFSET + (16 * FPREG_SIZE))
    157159#define PROBE_CPU_D17_OFFSET (PROBE_FIRST_FPREG_OFFSET + (17 * FPREG_SIZE))
     
    172174
    173175#define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (32 * FPREG_SIZE))
     176#else
     177#define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (16 * FPREG_SIZE))
     178#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    174179
    175180#define OUT_SIZE GPREG_SIZE
     
    223228COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d14]) == PROBE_CPU_D14_OFFSET, ProbeContext_cpu_d14_offset_matches_ctiMasmProbeTrampoline);
    224229COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d15]) == PROBE_CPU_D15_OFFSET, ProbeContext_cpu_d15_offset_matches_ctiMasmProbeTrampoline);
     230
     231#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    225232COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d16]) == PROBE_CPU_D16_OFFSET, ProbeContext_cpu_d16_offset_matches_ctiMasmProbeTrampoline);
    226233COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d17]) == PROBE_CPU_D17_OFFSET, ProbeContext_cpu_d17_offset_matches_ctiMasmProbeTrampoline);
     
    239246COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d30]) == PROBE_CPU_D30_OFFSET, ProbeContext_cpu_d30_offset_matches_ctiMasmProbeTrampoline);
    240247COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d31]) == PROBE_CPU_D31_OFFSET, ProbeContext_cpu_d31_offset_matches_ctiMasmProbeTrampoline);
     248#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    241249
    242250COMPILE_ASSERT(sizeof(ProbeContext) == PROBE_SIZE, ProbeContext_size_matches_ctiMasmProbeTrampoline);
     
    292300    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D0_OFFSET) "\n"
    293301    "vstmia.64 ip!, { d0-d15 }" "\n"
     302#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    294303    "vstmia.64 ip!, { d16-d31 }" "\n"
    295 
     304#endif
    296305    "mov       fp, sp" "\n" // Save the ProbeContext*.
    297306
     
    350359    // out of the ProbeContext before returning.
    351360
     361#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    352362    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D31_OFFSET + FPREG_SIZE) "\n"
    353363    "vldmdb.64 ip!, { d16-d31 }" "\n"
    354364    "vldmdb.64 ip!, { d0-d15 }" "\n"
     365#else
     366    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D15_OFFSET + FPREG_SIZE) "\n"
     367    "vldmdb.64 ip!, { d0-d15 }" "\n"
     368#endif
     369
    355370    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_R11_OFFSET + GPREG_SIZE) "\n"
    356371    "ldmdb     ip, { r0-r11 }" "\n"
  • trunk/Source/JavaScriptCore/assembler/MacroAssemblerARMv7.cpp

    r220823 r220871  
    9090#define PROBE_CPU_D14_OFFSET (PROBE_FIRST_FPREG_OFFSET + (14 * FPREG_SIZE))
    9191#define PROBE_CPU_D15_OFFSET (PROBE_FIRST_FPREG_OFFSET + (15 * FPREG_SIZE))
     92
     93#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    9294#define PROBE_CPU_D16_OFFSET (PROBE_FIRST_FPREG_OFFSET + (16 * FPREG_SIZE))
    9395#define PROBE_CPU_D17_OFFSET (PROBE_FIRST_FPREG_OFFSET + (17 * FPREG_SIZE))
     
    108110
    109111#define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (32 * FPREG_SIZE))
     112#else
     113#define PROBE_SIZE (PROBE_FIRST_FPREG_OFFSET + (16 * FPREG_SIZE))
     114#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    110115
    111116#define OUT_SIZE GPREG_SIZE
     
    160165COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d15]) == PROBE_CPU_D15_OFFSET, ProbeContext_cpu_d15_offset_matches_ctiMasmProbeTrampoline);
    161166
     167#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    162168COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d16]) == PROBE_CPU_D16_OFFSET, ProbeContext_cpu_d16_offset_matches_ctiMasmProbeTrampoline);
    163169COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d17]) == PROBE_CPU_D17_OFFSET, ProbeContext_cpu_d17_offset_matches_ctiMasmProbeTrampoline);
     
    176182COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d30]) == PROBE_CPU_D30_OFFSET, ProbeContext_cpu_d30_offset_matches_ctiMasmProbeTrampoline);
    177183COMPILE_ASSERT(PROBE_OFFSETOF(cpu.fprs[ARMRegisters::d31]) == PROBE_CPU_D31_OFFSET, ProbeContext_cpu_d31_offset_matches_ctiMasmProbeTrampoline);
     184#endif // CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    178185
    179186COMPILE_ASSERT(sizeof(ProbeContext) == PROBE_SIZE, ProbeContext_size_matches_ctiMasmProbeTrampoline);
    180187#undef PROBE_OFFSETOF
    181    
     188
    182189asm (
    183190    ".text" "\n"
     
    231238    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D0_OFFSET) "\n"
    232239    "vstmia.64 ip!, { d0-d15 }" "\n"
     240#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    233241    "vstmia.64 ip!, { d16-d31 }" "\n"
    234 
     242#endif
    235243    "mov       fp, sp" "\n" // Save the ProbeContext*.
    236244
     
    290298    // out of the ProbeContext before returning.
    291299
     300#if CPU(ARM_NEON) || CPU(ARM_VFP_V3_D32)
    292301    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D31_OFFSET + FPREG_SIZE) "\n"
    293302    "vldmdb.64 ip!, { d16-d31 }" "\n"
    294303    "vldmdb.64 ip!, { d0-d15 }" "\n"
     304#else
     305    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_D15_OFFSET + FPREG_SIZE) "\n"
     306    "vldmdb.64 ip!, { d0-d15 }" "\n"
     307#endif
    295308
    296309    "add       ip, sp, #" STRINGIZE_VALUE_OF(PROBE_CPU_R11_OFFSET + GPREG_SIZE) "\n"
  • trunk/Source/WTF/ChangeLog

    r220823 r220871  
     12017-08-17  Mark Lam  <mark.lam@apple.com>
     2
     3        Only use 16 VFP registers if !CPU(ARM_NEON).
     4        https://bugs.webkit.org/show_bug.cgi?id=175514
     5
     6        Reviewed by JF Bastien.
     7
     8        If CPU(ARM_NEON) is not enabled, we'll conservatively assume only VFP2 support is
     9        available. Hence, we'll only the first 16 FPDoubleRegisterIDs are available.
     10
     11        For reference, see:
     12        NEON registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJACABEJ.html
     13        VFP2 and VFP3 registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CIHDIBDG.html
     14        NEON to VFP register mapping: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJAIJHFC.html
     15
     16        This is mostly for GTK toolchains which may target older ARM CPUs which only have
     17        VFP2 support.
     18
     19        * wtf/Platform.h:
     20
    1212017-08-16  Mark Lam  <mark.lam@apple.com>
    222
  • trunk/Source/WTF/wtf/Platform.h

    r220823 r220871  
    340340#endif
    341341
     342/* If CPU(ARM_NEON) is not enabled, we'll conservatively assume only VFP2 or VFPv3D16
     343   support is available. Hence, only the first 16 64-bit floating point registers
     344   are available. See:
     345   NEON registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJACABEJ.html
     346   VFP2 and VFP3 registers: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CIHDIBDG.html
     347   NEON to VFP register mapping: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0473c/CJAIJHFC.html
     348*/
     349#if CPU(ARM_NEON)
     350#define WTF_CPU_ARM_VFP_V3_D32 1
     351#else
     352#define WTF_CPU_ARM_VFP_V2 1
     353#endif
     354
    342355#if defined(__ARM_ARCH_7K__)
    343356#define WTF_CPU_APPLE_ARMV7K 1
Note: See TracChangeset for help on using the changeset viewer.