Changeset 222919 in webkit


Ignore:
Timestamp:
Oct 5, 2017 10:58:55 AM (7 years ago)
Author:
Matt Lewis
Message:

Unreviewed, rolling out r222893.

This caused multiple API failures.

Reverted changeset:

"bmalloc mutex should be adaptive"
https://bugs.webkit.org/show_bug.cgi?id=177839
http://trac.webkit.org/changeset/222893

Location:
trunk/Source
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WTF/ChangeLog

    r222893 r222919  
     12017-10-05  Matt Lewis  <jlewis3@apple.com>
     2
     3        Unreviewed, rolling out r222893.
     4
     5        This caused multiple API failures.
     6
     7        Reverted changeset:
     8
     9        "bmalloc mutex should be adaptive"
     10        https://bugs.webkit.org/show_bug.cgi?id=177839
     11        http://trac.webkit.org/changeset/222893
     12
    1132017-10-04  Filip Pizlo  <fpizlo@apple.com>
    214
  • trunk/Source/WTF/wtf/LockAlgorithmInlines.h

    r222893 r222919  
    3333// the lock algorithm slow path without recompiling the world. Right now this should be included in two
    3434// places (Lock.cpp and JSCell.cpp).
    35 
    36 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst
    37 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no
    38 // reward.
    3935
    4036namespace WTF {
  • trunk/Source/WTF/wtf/WordLock.cpp

    r222893 r222919  
    7777} // anonymous namespace
    7878
    79 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst
    80 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no
    81 // reward.
    82 
    8379NEVER_INLINE void WordLockBase::lockSlow()
    8480{
  • trunk/Source/bmalloc/ChangeLog

    r222900 r222919  
     12017-10-05  Matt Lewis  <jlewis3@apple.com>
     2
     3        Unreviewed, rolling out r222893.
     4
     5        This caused multiple API failures.
     6
     7        Reverted changeset:
     8
     9        "bmalloc mutex should be adaptive"
     10        https://bugs.webkit.org/show_bug.cgi?id=177839
     11        http://trac.webkit.org/changeset/222893
     12
    1132017-10-05  Yusuke Suzuki  <utatane.tea@gmail.com>
    214
  • trunk/Source/bmalloc/bmalloc/Algorithm.h

    r222893 r222919  
    11/*
    2  * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
     2 * Copyright (C) 2014 Apple Inc. All rights reserved.
    33 *
    44 * Redistribution and use in source and binary forms, with or without
     
    2929#include "BAssert.h"
    3030#include <algorithm>
    31 #include <atomic>
    3231#include <cstdint>
    3332#include <cstddef>
     
    160159}
    161160
    162 // We need a CAS API that isn't the badly designed one from C++.
    163 template<typename T>
    164 bool compareExchangeWeak(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
    165 {
    166     // They could have made a sensible CAS API, but they didn't. Sneaky fact: for no good reason, the
    167     // C++ API will mutate expected. I think that apologists will say something about how it saves you
    168     // reloading the value around the back of your CAS loop, but that's nonsense. The delay along the
    169     // back edge of a CAS loop has almost no impact on CAS loop performance. More often than not, we
    170     // want to *add* delays to the back edge, not remove them.
    171     return word.compare_exchange_weak(expected, desired, order, std::memory_order_relaxed);
    172 }
    173 
    174 template<typename T>
    175 bool compareExchangeStrong(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst)
    176 {
    177     return word.compare_exchange_strong(expected, desired, order, std::memory_order_relaxed);
    178 }
    179 
    180161} // namespace bmalloc
    181162
  • trunk/Source/bmalloc/bmalloc/PerThread.h

    r222893 r222919  
    6161};
    6262
     63#if HAVE_PTHREAD_MACHDEP_H
     64
    6365class Cache;
    6466template<typename T> struct PerThreadStorage;
    65 
    66 #if HAVE_PTHREAD_MACHDEP_H
    6767
    6868// For now, we only support PerThread<PerHeapKind<Cache>>. We can expand to other types by
     
    8383};
    8484
    85 #endif
     85#else
    8686
    8787template<typename T> struct PerThreadStorage {
     
    112112template<typename T> pthread_key_t PerThreadStorage<T>::s_key;
    113113template<typename T> std::once_flag PerThreadStorage<T>::s_onceFlag;
     114
     115#endif
    114116
    115117template<typename T>
  • trunk/Source/bmalloc/bmalloc/StaticMutex.cpp

    r222893 r222919  
    11/*
    2  * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
     2 * Copyright (C) 2014 Apple Inc. All rights reserved.
    33 *
    44 * Redistribution and use in source and binary forms, with or without
     
    2424 */
    2525
     26#include "ScopeExit.h"
    2627#include "StaticMutex.h"
    27 
    28 #include "PerThread.h"
    29 #include "ScopeExit.h"
    30 #include <condition_variable>
    31 #include <mutex>
    3228#include <thread>
    3329
    3430namespace bmalloc {
    3531
    36 // FIXME: This duplicates code from WTF::WordLock.
    37 // https://bugs.webkit.org/show_bug.cgi?id=177719
     32void StaticMutex::lockSlowCase()
     33{
     34    // The longest critical section in bmalloc is much shorter than the
     35    // time it takes to make a system call to yield to the OS scheduler.
     36    // So, we try again a lot before we yield.
     37    static const size_t aLot = 256;
     38   
     39    if (!m_isSpinning.test_and_set()) {
     40        auto clear = makeScopeExit([&] { m_isSpinning.clear(); });
    3841
    39 namespace {
    40 
    41 // This data structure serves three purposes:
    42 //
    43 // 1) A parking mechanism for threads that go to sleep. That involves just a system mutex and
    44 //    condition variable.
    45 //
    46 // 2) A queue node for when a thread is on some lock's queue.
    47 //
    48 // 3) The queue head. This is kind of funky. When a thread is the head of a queue, it also serves as
    49 //    the basic queue bookkeeping data structure. When a thread is dequeued, the next thread in the
    50 //    queue takes on the queue head duties.
    51 struct ThreadData {
    52     // The parking mechanism.
    53     bool shouldPark { false };
    54     std::mutex parkingLock;
    55     std::condition_variable parkingCondition;
    56 
    57     // The queue node.
    58     ThreadData* nextInQueue { nullptr };
    59 
    60     // The queue itself.
    61     ThreadData* queueTail { nullptr };
    62 };
    63 
    64 ThreadData* myThreadData()
    65 {
    66     return PerThread<ThreadData>::get();
    67 }
    68 
    69 } // anonymous namespace
    70 
    71 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst
    72 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no
    73 // reward.
    74 
    75 BNO_INLINE void StaticMutex::lockSlow()
    76 {
    77     unsigned spinCount = 0;
    78 
    79     // This magic number turns out to be optimal based on past JikesRVM experiments.
    80     const unsigned spinLimit = 40;
    81    
    82     for (;;) {
    83         uintptr_t currentWordValue = m_word.load();
    84        
    85         if (!(currentWordValue & isLockedBit)) {
    86             // It's not possible for someone to hold the queue lock while the lock itself is no longer
    87             // held, since we will only attempt to acquire the queue lock when the lock is held and
    88             // the queue lock prevents unlock.
    89             BASSERT(!(currentWordValue & isQueueLockedBit));
    90             if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isLockedBit)) {
    91                 // Success! We acquired the lock.
     42        for (size_t i = 0; i < aLot; ++i) {
     43            if (try_lock())
    9244                return;
    93             }
    9445        }
    95 
    96         // If there is no queue and we haven't spun too much, we can just try to spin around again.
    97         if (!(currentWordValue & ~queueHeadMask) && spinCount < spinLimit) {
    98             spinCount++;
    99             sched_yield();
    100             continue;
    101         }
    102 
    103         // Need to put ourselves on the queue. Create the queue if one does not exist. This requries
    104         // owning the queue for a little bit. The lock that controls the queue is itself a spinlock.
    105         // But before we acquire the queue spinlock, we make sure that we have a ThreadData for this
    106         // thread.
    107         ThreadData* me = myThreadData();
    108         BASSERT(!me->shouldPark);
    109         BASSERT(!me->nextInQueue);
    110         BASSERT(!me->queueTail);
    111 
    112         // Reload the current word value, since some time may have passed.
    113         currentWordValue = m_word.load();
    114 
    115         // We proceed only if the queue lock is not held, the lock is held, and we succeed in
    116         // acquiring the queue lock.
    117         if ((currentWordValue & isQueueLockedBit)
    118             || !(currentWordValue & isLockedBit)
    119             || !compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit)) {
    120             sched_yield();
    121             continue;
    122         }
    123        
    124         me->shouldPark = true;
    125 
    126         // We own the queue. Nobody can enqueue or dequeue until we're done. Also, it's not possible
    127         // to release the lock while we hold the queue lock.
    128         ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask);
    129         if (queueHead) {
    130             // Put this thread at the end of the queue.
    131             queueHead->queueTail->nextInQueue = me;
    132             queueHead->queueTail = me;
    133 
    134             // Release the queue lock.
    135             currentWordValue = m_word.load();
    136             BASSERT(currentWordValue & ~queueHeadMask);
    137             BASSERT(currentWordValue & isQueueLockedBit);
    138             BASSERT(currentWordValue & isLockedBit);
    139             m_word.store(currentWordValue & ~isQueueLockedBit);
    140         } else {
    141             // Make this thread be the queue-head.
    142             queueHead = me;
    143             me->queueTail = me;
    144 
    145             // Release the queue lock and install ourselves as the head. No need for a CAS loop, since
    146             // we own the queue lock.
    147             currentWordValue = m_word.load();
    148             BASSERT(~(currentWordValue & ~queueHeadMask));
    149             BASSERT(currentWordValue & isQueueLockedBit);
    150             BASSERT(currentWordValue & isLockedBit);
    151             uintptr_t newWordValue = currentWordValue;
    152             newWordValue |= reinterpret_cast<uintptr_t>(queueHead);
    153             newWordValue &= ~isQueueLockedBit;
    154             m_word.store(newWordValue);
    155         }
    156 
    157         // At this point everyone who acquires the queue lock will see me on the queue, and anyone who
    158         // acquires me's lock will see that me wants to park. Note that shouldPark may have been
    159         // cleared as soon as the queue lock was released above, but it will happen while the
    160         // releasing thread holds me's parkingLock.
    161 
    162         {
    163             std::unique_lock<std::mutex> locker(me->parkingLock);
    164             while (me->shouldPark)
    165                 me->parkingCondition.wait(locker);
    166         }
    167 
    168         BASSERT(!me->shouldPark);
    169         BASSERT(!me->nextInQueue);
    170         BASSERT(!me->queueTail);
    171        
    172         // Now we can loop around and try to acquire the lock again.
    173     }
    174 }
    175 
    176 BNO_INLINE void StaticMutex::unlockSlow()
    177 {
    178     // The fast path can fail either because of spurious weak CAS failure, or because someone put a
    179     // thread on the queue, or the queue lock is held. If the queue lock is held, it can only be
    180     // because someone *will* enqueue a thread onto the queue.
    181 
    182     // Acquire the queue lock, or release the lock. This loop handles both lock release in case the
    183     // fast path's weak CAS spuriously failed and it handles queue lock acquisition if there is
    184     // actually something interesting on the queue.
    185     for (;;) {
    186         uintptr_t currentWordValue = m_word.load();
    187 
    188         BASSERT(currentWordValue & isLockedBit);
    189        
    190         if (currentWordValue == isLockedBit) {
    191             if (compareExchangeWeak(m_word, isLockedBit, clear)) {
    192                 // The fast path's weak CAS had spuriously failed, and now we succeeded. The lock is
    193                 // unlocked and we're done!
    194                 return;
    195             }
    196             // Loop around and try again.
    197             sched_yield();
    198             continue;
    199         }
    200        
    201         if (currentWordValue & isQueueLockedBit) {
    202             sched_yield();
    203             continue;
    204         }
    205 
    206         // If it wasn't just a spurious weak CAS failure and if the queue lock is not held, then there
    207         // must be an entry on the queue.
    208         BASSERT(currentWordValue & ~queueHeadMask);
    209 
    210         if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit))
    211             break;
    21246    }
    21347
    214     uintptr_t currentWordValue = m_word.load();
    215        
    216     // After we acquire the queue lock, the lock must still be held and the queue must be
    217     // non-empty. The queue must be non-empty since only the lockSlow() method could have held the
    218     // queue lock and if it did then it only releases it after putting something on the queue.
    219     BASSERT(currentWordValue & isLockedBit);
    220     BASSERT(currentWordValue & isQueueLockedBit);
    221     ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask);
    222     BASSERT(queueHead);
    223 
    224     ThreadData* newQueueHead = queueHead->nextInQueue;
    225     // Either this was the only thread on the queue, in which case we delete the queue, or there
    226     // are still more threads on the queue, in which case we create a new queue head.
    227     if (newQueueHead)
    228         newQueueHead->queueTail = queueHead->queueTail;
    229 
    230     // Change the queue head, possibly removing it if newQueueHead is null. No need for a CAS loop,
    231     // since we hold the queue lock and the lock itself so nothing about the lock can change right
    232     // now.
    233     currentWordValue = m_word.load();
    234     BASSERT(currentWordValue & isLockedBit);
    235     BASSERT(currentWordValue & isQueueLockedBit);
    236     BASSERT((currentWordValue & ~queueHeadMask) == reinterpret_cast<uintptr_t>(queueHead));
    237     uintptr_t newWordValue = currentWordValue;
    238     newWordValue &= ~isLockedBit; // Release the lock.
    239     newWordValue &= ~isQueueLockedBit; // Release the queue lock.
    240     newWordValue &= queueHeadMask; // Clear out the old queue head.
    241     newWordValue |= reinterpret_cast<uintptr_t>(newQueueHead); // Install new queue head.
    242     m_word.store(newWordValue);
    243 
    244     // Now the lock is available for acquisition. But we just have to wake up the old queue head.
    245     // After that, we're done!
    246 
    247     queueHead->nextInQueue = nullptr;
    248     queueHead->queueTail = nullptr;
    249 
    250     // We do this carefully because this may run either before or during the parkingLock critical
    251     // section in lockSlow().
    252     {
    253         std::lock_guard<std::mutex> locker(queueHead->parkingLock);
    254         queueHead->shouldPark = false;
    255     }
    256     // Doesn't matter if we notify_all() or notify_one() here since the only thread that could be
    257     // waiting is queueHead.
    258     queueHead->parkingCondition.notify_one();
    259 
    260     // The old queue head can now contend for the lock again. We're done!
     48    // Avoid spinning pathologically.
     49    while (!try_lock())
     50        sched_yield();
    26151}
    26252
  • trunk/Source/bmalloc/bmalloc/StaticMutex.h

    r222893 r222919  
    11/*
    2  * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
     2 * Copyright (C) 2014 Apple Inc. All rights reserved.
    33 *
    44 * Redistribution and use in source and binary forms, with or without
     
    2727#define StaticMutex_h
    2828
    29 #include "Algorithm.h"
    3029#include "BAssert.h"
    31 #include "BExport.h"
    3230#include <atomic>
    3331#include <mutex>
     
    3836// Use StaticMutex in static storage, where global constructors and exit-time
    3937// destructors are prohibited, but all memory is zero-initialized automatically.
    40 
    41 // This uses the code from what WTF used to call WordLock. It's a fully adaptive mutex that uses
    42 // sizeof(void*) storage. It has a fast path that is similar to a spinlock, and a slow path that is
    43 // similar to std::mutex.
    44 
    45 // NOTE: In general, this is a great lock to use if you are very low in the stack. WTF continues to
    46 // have a copy of this code.
    47 
    48 // FIXME: Either fold bmalloc into WTF or find a better way to share code between them.
    49 // https://bugs.webkit.org/show_bug.cgi?id=177719
    5038
    5139namespace bmalloc {
     
    5846public:
    5947    void lock();
    60     bool tryLock();
    61     bool try_lock() { return tryLock(); }
     48    bool try_lock();
    6249    void unlock();
    63    
    64     bool isHeld() const;
    65     bool isLocked() const { return isHeld(); }
    6650
    6751private:
    68     BEXPORT void lockSlow();
    69     BEXPORT void unlockSlow();
     52    void lockSlowCase();
    7053
    71     static const uintptr_t clear = 0;
    72     static const uintptr_t isLockedBit = 1;
    73     static const uintptr_t isQueueLockedBit = 2;
    74     static const uintptr_t queueHeadMask = 3;
     54    std::atomic_flag m_flag;
     55    std::atomic_flag m_isSpinning;
     56};
    7557
    76     std::atomic<uintptr_t> m_word;
    77 };
     58static inline void sleep(
     59    std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds duration)
     60{
     61    if (duration == std::chrono::milliseconds(0))
     62        return;
     63   
     64    lock.unlock();
     65    std::this_thread::sleep_for(duration);
     66    lock.lock();
     67}
     68
     69static inline void waitUntilFalse(
     70    std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds sleepDuration,
     71    bool& condition)
     72{
     73    while (condition) {
     74        condition = false;
     75        sleep(lock, sleepDuration);
     76    }
     77}
    7878
    7979inline void StaticMutex::init()
    8080{
    81     m_word.store(0, std::memory_order_relaxed);
     81    m_flag.clear();
     82    m_isSpinning.clear();
    8283}
    8384
    84 inline bool StaticMutex::tryLock()
     85inline bool StaticMutex::try_lock()
    8586{
    86     return m_word.load(std::memory_order_acquire) & isLockedBit;
     87    return !m_flag.test_and_set(std::memory_order_acquire);
    8788}
    8889
    8990inline void StaticMutex::lock()
    9091{
    91     if (compareExchangeWeak(m_word, clear, isLockedBit, std::memory_order_acquire))
    92         return;
    93    
    94     lockSlow();
     92    if (!try_lock())
     93        lockSlowCase();
    9594}
    9695
    9796inline void StaticMutex::unlock()
    9897{
    99     if (compareExchangeWeak(m_word, isLockedBit, clear, std::memory_order_release))
    100         return;
    101    
    102     unlockSlow();
     98    m_flag.clear(std::memory_order_release);
    10399}
    104100
Note: See TracChangeset for help on using the changeset viewer.