Changeset 286587 in webkit


Ignore:
Timestamp:
Dec 6, 2021 9:52:22 PM (2 years ago)
Author:
fpizlo@apple.com
Message:

[libpas] Clean up what the machine code looks like under LTO
https://bugs.webkit.org/show_bug.cgi?id=233909

Reviewed by Yusuke Suzuki.

During the very painful perf burndown of libpas that got it to be a progression (rather than a
regression) versus bmalloc, I found that certain key fast paths - like fastMalloc and fastFree - perform
better when they do not have a stack frame. For this to happen, they must only make tail calls.

Sadly, LTO was inlining a slow path into fastFree (i.e. pas_deallocate), and this slow path had an
assertion, and the call to pas_assertion_failed was not a tail call.

This fixes the problem comprehensively:

  • We now compile pas_assertion_failed as a breakpoint, just like WebKit would do. This is better for code size and maybe it could sometimes be better for performance.
  • The slow path that was getting inlined is now marked PAS_NEVER_INLINE.
  • Inspecting the machine code, I found a couple other slow paths that were being inlined in a bunch of places, and also marked them NEVER_INLINE.

If my past experiences are right, this could be a tiny speed-up on malloc-heavy workloads.

  • libpas/src/libpas/pas_lock.c:

(pas_lock_lock_slow):

  • libpas/src/libpas/pas_lock.h:
  • libpas/src/libpas/pas_monotonic_time.c:

(get_timebase_info_slow):
(get_timebase_info):

  • libpas/src/libpas/pas_thread_local_cache.c:

(pas_thread_local_cache_append_deallocation_slow):

  • libpas/src/libpas/pas_thread_local_cache.h:
  • libpas/src/libpas/pas_utils.c:
  • libpas/src/libpas/pas_utils.h:

(pas_assertion_failed):
(pas_assertion_failed_noreturn_silencer):

Location:
trunk/Source/bmalloc
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/bmalloc/ChangeLog

    r286573 r286587  
     12021-12-06  Filip Pizlo  <fpizlo@apple.com>
     2
     3        [libpas] Clean up what the machine code looks like under LTO
     4        https://bugs.webkit.org/show_bug.cgi?id=233909
     5
     6        Reviewed by Yusuke Suzuki.
     7
     8        During the very painful perf burndown of libpas that got it to be a progression (rather than a
     9        regression) versus bmalloc, I found that certain key fast paths - like fastMalloc and fastFree - perform
     10        better when they do not have a stack frame. For this to happen, they must only make tail calls.
     11
     12        Sadly, LTO was inlining a slow path into fastFree (i.e. pas_deallocate), and this slow path had an
     13        assertion, and the call to pas_assertion_failed was not a tail call.
     14
     15        This fixes the problem comprehensively:
     16
     17        - We now compile pas_assertion_failed as a breakpoint, just like WebKit would do. This is better for
     18          code size and maybe it could sometimes be better for performance.
     19
     20        - The slow path that was getting inlined is now marked PAS_NEVER_INLINE.
     21
     22        - Inspecting the machine code, I found a couple other slow paths that were being inlined in a bunch of
     23          places, and also marked them NEVER_INLINE.
     24
     25        If my past experiences are right, this could be a tiny speed-up on malloc-heavy workloads.
     26
     27        * libpas/src/libpas/pas_lock.c:
     28        (pas_lock_lock_slow):
     29        * libpas/src/libpas/pas_lock.h:
     30        * libpas/src/libpas/pas_monotonic_time.c:
     31        (get_timebase_info_slow):
     32        (get_timebase_info):
     33        * libpas/src/libpas/pas_thread_local_cache.c:
     34        (pas_thread_local_cache_append_deallocation_slow):
     35        * libpas/src/libpas/pas_thread_local_cache.h:
     36        * libpas/src/libpas/pas_utils.c:
     37        * libpas/src/libpas/pas_utils.h:
     38        (pas_assertion_failed):
     39        (pas_assertion_failed_noreturn_silencer):
     40
    1412021-12-06  Yusuke Suzuki  <ysuzuki@apple.com>
    242
  • trunk/Source/bmalloc/libpas/src/libpas/pas_lock.c

    r279867 r286587  
    11/*
    2  * Copyright (c) 2020 Apple Inc. All rights reserved.
     2 * Copyright (c) 2020-2021 Apple Inc. All rights reserved.
    33 *
    44 * Redistribution and use in source and binary forms, with or without
     
    3232#if PAS_USE_SPINLOCKS
    3333
    34 void pas_lock_lock_slow(pas_lock* lock)
     34PAS_NEVER_INLINE void pas_lock_lock_slow(pas_lock* lock)
    3535{
    3636    static const size_t a_lot = 256;
  • trunk/Source/bmalloc/libpas/src/libpas/pas_lock.h

    r285853 r286587  
    11/*
    2  * Copyright (c) 2018-2020 Apple Inc. All rights reserved.
     2 * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
    33 *
    44 * Redistribution and use in source and binary forms, with or without
     
    7979}
    8080
    81 PAS_API void pas_lock_lock_slow(pas_lock* lock);
     81PAS_API PAS_NEVER_INLINE void pas_lock_lock_slow(pas_lock* lock);
    8282
    8383static inline void pas_lock_lock(pas_lock* lock)
  • trunk/Source/bmalloc/libpas/src/libpas/pas_monotonic_time.c

    r285853 r286587  
    3737static mach_timebase_info_data_t* timebase_info_ptr;
    3838
     39static PAS_NEVER_INLINE mach_timebase_info_data_t* get_timebase_info_slow(void)
     40{
     41    kern_return_t kern_return;
     42    kern_return = mach_timebase_info(&timebase_info);
     43    PAS_ASSERT(kern_return == KERN_SUCCESS);
     44    pas_fence();
     45    timebase_info_ptr = &timebase_info;
     46    return &timebase_info;
     47}
     48
    3949static mach_timebase_info_data_t* get_timebase_info(void)
    4050{
     
    4252
    4353    result = timebase_info_ptr;
    44     if (!result) {
    45         kern_return_t kern_return;
    46         kern_return = mach_timebase_info(&timebase_info);
    47         PAS_ASSERT(kern_return == KERN_SUCCESS);
    48         pas_fence();
    49         timebase_info_ptr = &timebase_info;
    50         result = &timebase_info;
    51     }
     54    if (PAS_LIKELY(result))
     55        return result;
    5256
    53     return result;
     57    return get_timebase_info_slow();
    5458}
    5559
  • trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.c

    r286573 r286587  
    794794}
    795795
    796 void pas_thread_local_cache_append_deallocation_slow(pas_thread_local_cache* thread_local_cache,
    797                                                      uintptr_t begin,
    798                                                      pas_segregated_page_config_kind_and_role kind_and_role)
     796PAS_NEVER_INLINE void pas_thread_local_cache_append_deallocation_slow(
     797    pas_thread_local_cache* thread_local_cache,
     798    uintptr_t begin,
     799    pas_segregated_page_config_kind_and_role kind_and_role)
    799800{
    800801    unsigned index;
  • trunk/Source/bmalloc/libpas/src/libpas/pas_thread_local_cache.h

    r286573 r286587  
    335335                                            pas_lock_hold_mode heap_lock_hold_mode);
    336336
    337 PAS_API void pas_thread_local_cache_append_deallocation_slow(
     337PAS_API PAS_NEVER_INLINE void pas_thread_local_cache_append_deallocation_slow(
    338338    pas_thread_local_cache* thread_local_cache,
    339339    uintptr_t begin,
  • trunk/Source/bmalloc/libpas/src/libpas/pas_utils.c

    r285789 r286587  
    11/*
    2  * Copyright (c) 2018-2019 Apple Inc. All rights reserved.
     2 * Copyright (c) 2018-2021 Apple Inc. All rights reserved.
    33 *
    44 * Redistribution and use in source and binary forms, with or without
     
    4949}
    5050
     51#if PAS_ENABLE_TESTING
    5152void pas_assertion_failed(const char* filename, int line, const char* function, const char* expression)
    5253{
    5354    pas_panic("%s:%d: %s: assertion %s failed.\n", filename, line, function, expression);
    5455}
     56#endif /* PAS_ENABLE_TESTING */
    5557
    5658static void (*deallocation_did_fail_callback)(const char* reason, void* begin);
  • trunk/Source/bmalloc/libpas/src/libpas/pas_utils.h

    r286573 r286587  
    107107                                                                      size_t new_size);
    108108
    109 PAS_API PAS_NO_RETURN void pas_assertion_failed(const char* filename, int line, const char* function, const char* expression);
     109#if PAS_ENABLE_TESTING
     110PAS_API PAS_NO_RETURN void pas_assertion_failed(
     111    const char* filename, int line, const char* function, const char* expression);
     112#else /* PAS_ENABLE_TESTING -> so !PAS_ENABLE_TESTING */
     113static PAS_ALWAYS_INLINE PAS_NO_RETURN void pas_assertion_failed(
     114    const char* filename, int line, const char* function, const char* expression)
     115{
     116    PAS_UNUSED_PARAM(filename);
     117    PAS_UNUSED_PARAM(line);
     118    PAS_UNUSED_PARAM(function);
     119    PAS_UNUSED_PARAM(expression);
     120    __builtin_trap();
     121}
     122#endif /* PAS_ENABLE_TESTING -> so end of !PAS_ENABLE_TESTING */
    110123
    111124PAS_IGNORE_WARNINGS_BEGIN("missing-noreturn")
    112 static inline void pas_assertion_failed_noreturn_silencer(
     125static PAS_ALWAYS_INLINE void pas_assertion_failed_noreturn_silencer(
    113126    const char* filename, int line, const char* function, const char* expression)
    114127{
Note: See TracChangeset for help on using the changeset viewer.