Changeset 222919 in webkit
- Timestamp:
- Oct 5, 2017 10:58:55 AM (7 years ago)
- Location:
- trunk/Source
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WTF/ChangeLog
r222893 r222919 1 2017-10-05 Matt Lewis <jlewis3@apple.com> 2 3 Unreviewed, rolling out r222893. 4 5 This caused multiple API failures. 6 7 Reverted changeset: 8 9 "bmalloc mutex should be adaptive" 10 https://bugs.webkit.org/show_bug.cgi?id=177839 11 http://trac.webkit.org/changeset/222893 12 1 13 2017-10-04 Filip Pizlo <fpizlo@apple.com> 2 14 -
trunk/Source/WTF/wtf/LockAlgorithmInlines.h
r222893 r222919 33 33 // the lock algorithm slow path without recompiling the world. Right now this should be included in two 34 34 // places (Lock.cpp and JSCell.cpp). 35 36 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst37 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no38 // reward.39 35 40 36 namespace WTF { -
trunk/Source/WTF/wtf/WordLock.cpp
r222893 r222919 77 77 } // anonymous namespace 78 78 79 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst80 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no81 // reward.82 83 79 NEVER_INLINE void WordLockBase::lockSlow() 84 80 { -
trunk/Source/bmalloc/ChangeLog
r222900 r222919 1 2017-10-05 Matt Lewis <jlewis3@apple.com> 2 3 Unreviewed, rolling out r222893. 4 5 This caused multiple API failures. 6 7 Reverted changeset: 8 9 "bmalloc mutex should be adaptive" 10 https://bugs.webkit.org/show_bug.cgi?id=177839 11 http://trac.webkit.org/changeset/222893 12 1 13 2017-10-05 Yusuke Suzuki <utatane.tea@gmail.com> 2 14 -
trunk/Source/bmalloc/bmalloc/Algorithm.h
r222893 r222919 1 1 /* 2 * Copyright (C) 2014 -2017Apple Inc. All rights reserved.2 * Copyright (C) 2014 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 29 29 #include "BAssert.h" 30 30 #include <algorithm> 31 #include <atomic>32 31 #include <cstdint> 33 32 #include <cstddef> … … 160 159 } 161 160 162 // We need a CAS API that isn't the badly designed one from C++.163 template<typename T>164 bool compareExchangeWeak(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst)165 {166 // They could have made a sensible CAS API, but they didn't. Sneaky fact: for no good reason, the167 // C++ API will mutate expected. I think that apologists will say something about how it saves you168 // reloading the value around the back of your CAS loop, but that's nonsense. The delay along the169 // back edge of a CAS loop has almost no impact on CAS loop performance. More often than not, we170 // want to *add* delays to the back edge, not remove them.171 return word.compare_exchange_weak(expected, desired, order, std::memory_order_relaxed);172 }173 174 template<typename T>175 bool compareExchangeStrong(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst)176 {177 return word.compare_exchange_strong(expected, desired, order, std::memory_order_relaxed);178 }179 180 161 } // namespace bmalloc 181 162 -
trunk/Source/bmalloc/bmalloc/PerThread.h
r222893 r222919 61 61 }; 62 62 63 #if HAVE_PTHREAD_MACHDEP_H 64 63 65 class Cache; 64 66 template<typename T> struct PerThreadStorage; 65 66 #if HAVE_PTHREAD_MACHDEP_H67 67 68 68 // For now, we only support PerThread<PerHeapKind<Cache>>. We can expand to other types by … … 83 83 }; 84 84 85 #e ndif85 #else 86 86 87 87 template<typename T> struct PerThreadStorage { … … 112 112 template<typename T> pthread_key_t PerThreadStorage<T>::s_key; 113 113 template<typename T> std::once_flag PerThreadStorage<T>::s_onceFlag; 114 115 #endif 114 116 115 117 template<typename T> -
trunk/Source/bmalloc/bmalloc/StaticMutex.cpp
r222893 r222919 1 1 /* 2 * Copyright (C) 2014 -2017Apple Inc. All rights reserved.2 * Copyright (C) 2014 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 24 24 */ 25 25 26 #include "ScopeExit.h" 26 27 #include "StaticMutex.h" 27 28 #include "PerThread.h"29 #include "ScopeExit.h"30 #include <condition_variable>31 #include <mutex>32 28 #include <thread> 33 29 34 30 namespace bmalloc { 35 31 36 // FIXME: This duplicates code from WTF::WordLock. 37 // https://bugs.webkit.org/show_bug.cgi?id=177719 32 void StaticMutex::lockSlowCase() 33 { 34 // The longest critical section in bmalloc is much shorter than the 35 // time it takes to make a system call to yield to the OS scheduler. 36 // So, we try again a lot before we yield. 37 static const size_t aLot = 256; 38 39 if (!m_isSpinning.test_and_set()) { 40 auto clear = makeScopeExit([&] { m_isSpinning.clear(); }); 38 41 39 namespace { 40 41 // This data structure serves three purposes: 42 // 43 // 1) A parking mechanism for threads that go to sleep. That involves just a system mutex and 44 // condition variable. 45 // 46 // 2) A queue node for when a thread is on some lock's queue. 47 // 48 // 3) The queue head. This is kind of funky. When a thread is the head of a queue, it also serves as 49 // the basic queue bookkeeping data structure. When a thread is dequeued, the next thread in the 50 // queue takes on the queue head duties. 51 struct ThreadData { 52 // The parking mechanism. 53 bool shouldPark { false }; 54 std::mutex parkingLock; 55 std::condition_variable parkingCondition; 56 57 // The queue node. 58 ThreadData* nextInQueue { nullptr }; 59 60 // The queue itself. 61 ThreadData* queueTail { nullptr }; 62 }; 63 64 ThreadData* myThreadData() 65 { 66 return PerThread<ThreadData>::get(); 67 } 68 69 } // anonymous namespace 70 71 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst 72 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no 73 // reward. 74 75 BNO_INLINE void StaticMutex::lockSlow() 76 { 77 unsigned spinCount = 0; 78 79 // This magic number turns out to be optimal based on past JikesRVM experiments. 80 const unsigned spinLimit = 40; 81 82 for (;;) { 83 uintptr_t currentWordValue = m_word.load(); 84 85 if (!(currentWordValue & isLockedBit)) { 86 // It's not possible for someone to hold the queue lock while the lock itself is no longer 87 // held, since we will only attempt to acquire the queue lock when the lock is held and 88 // the queue lock prevents unlock. 89 BASSERT(!(currentWordValue & isQueueLockedBit)); 90 if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isLockedBit)) { 91 // Success! We acquired the lock. 42 for (size_t i = 0; i < aLot; ++i) { 43 if (try_lock()) 92 44 return; 93 }94 45 } 95 96 // If there is no queue and we haven't spun too much, we can just try to spin around again.97 if (!(currentWordValue & ~queueHeadMask) && spinCount < spinLimit) {98 spinCount++;99 sched_yield();100 continue;101 }102 103 // Need to put ourselves on the queue. Create the queue if one does not exist. This requries104 // owning the queue for a little bit. The lock that controls the queue is itself a spinlock.105 // But before we acquire the queue spinlock, we make sure that we have a ThreadData for this106 // thread.107 ThreadData* me = myThreadData();108 BASSERT(!me->shouldPark);109 BASSERT(!me->nextInQueue);110 BASSERT(!me->queueTail);111 112 // Reload the current word value, since some time may have passed.113 currentWordValue = m_word.load();114 115 // We proceed only if the queue lock is not held, the lock is held, and we succeed in116 // acquiring the queue lock.117 if ((currentWordValue & isQueueLockedBit)118 || !(currentWordValue & isLockedBit)119 || !compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit)) {120 sched_yield();121 continue;122 }123 124 me->shouldPark = true;125 126 // We own the queue. Nobody can enqueue or dequeue until we're done. Also, it's not possible127 // to release the lock while we hold the queue lock.128 ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask);129 if (queueHead) {130 // Put this thread at the end of the queue.131 queueHead->queueTail->nextInQueue = me;132 queueHead->queueTail = me;133 134 // Release the queue lock.135 currentWordValue = m_word.load();136 BASSERT(currentWordValue & ~queueHeadMask);137 BASSERT(currentWordValue & isQueueLockedBit);138 BASSERT(currentWordValue & isLockedBit);139 m_word.store(currentWordValue & ~isQueueLockedBit);140 } else {141 // Make this thread be the queue-head.142 queueHead = me;143 me->queueTail = me;144 145 // Release the queue lock and install ourselves as the head. No need for a CAS loop, since146 // we own the queue lock.147 currentWordValue = m_word.load();148 BASSERT(~(currentWordValue & ~queueHeadMask));149 BASSERT(currentWordValue & isQueueLockedBit);150 BASSERT(currentWordValue & isLockedBit);151 uintptr_t newWordValue = currentWordValue;152 newWordValue |= reinterpret_cast<uintptr_t>(queueHead);153 newWordValue &= ~isQueueLockedBit;154 m_word.store(newWordValue);155 }156 157 // At this point everyone who acquires the queue lock will see me on the queue, and anyone who158 // acquires me's lock will see that me wants to park. Note that shouldPark may have been159 // cleared as soon as the queue lock was released above, but it will happen while the160 // releasing thread holds me's parkingLock.161 162 {163 std::unique_lock<std::mutex> locker(me->parkingLock);164 while (me->shouldPark)165 me->parkingCondition.wait(locker);166 }167 168 BASSERT(!me->shouldPark);169 BASSERT(!me->nextInQueue);170 BASSERT(!me->queueTail);171 172 // Now we can loop around and try to acquire the lock again.173 }174 }175 176 BNO_INLINE void StaticMutex::unlockSlow()177 {178 // The fast path can fail either because of spurious weak CAS failure, or because someone put a179 // thread on the queue, or the queue lock is held. If the queue lock is held, it can only be180 // because someone *will* enqueue a thread onto the queue.181 182 // Acquire the queue lock, or release the lock. This loop handles both lock release in case the183 // fast path's weak CAS spuriously failed and it handles queue lock acquisition if there is184 // actually something interesting on the queue.185 for (;;) {186 uintptr_t currentWordValue = m_word.load();187 188 BASSERT(currentWordValue & isLockedBit);189 190 if (currentWordValue == isLockedBit) {191 if (compareExchangeWeak(m_word, isLockedBit, clear)) {192 // The fast path's weak CAS had spuriously failed, and now we succeeded. The lock is193 // unlocked and we're done!194 return;195 }196 // Loop around and try again.197 sched_yield();198 continue;199 }200 201 if (currentWordValue & isQueueLockedBit) {202 sched_yield();203 continue;204 }205 206 // If it wasn't just a spurious weak CAS failure and if the queue lock is not held, then there207 // must be an entry on the queue.208 BASSERT(currentWordValue & ~queueHeadMask);209 210 if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit))211 break;212 46 } 213 47 214 uintptr_t currentWordValue = m_word.load(); 215 216 // After we acquire the queue lock, the lock must still be held and the queue must be 217 // non-empty. The queue must be non-empty since only the lockSlow() method could have held the 218 // queue lock and if it did then it only releases it after putting something on the queue. 219 BASSERT(currentWordValue & isLockedBit); 220 BASSERT(currentWordValue & isQueueLockedBit); 221 ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask); 222 BASSERT(queueHead); 223 224 ThreadData* newQueueHead = queueHead->nextInQueue; 225 // Either this was the only thread on the queue, in which case we delete the queue, or there 226 // are still more threads on the queue, in which case we create a new queue head. 227 if (newQueueHead) 228 newQueueHead->queueTail = queueHead->queueTail; 229 230 // Change the queue head, possibly removing it if newQueueHead is null. No need for a CAS loop, 231 // since we hold the queue lock and the lock itself so nothing about the lock can change right 232 // now. 233 currentWordValue = m_word.load(); 234 BASSERT(currentWordValue & isLockedBit); 235 BASSERT(currentWordValue & isQueueLockedBit); 236 BASSERT((currentWordValue & ~queueHeadMask) == reinterpret_cast<uintptr_t>(queueHead)); 237 uintptr_t newWordValue = currentWordValue; 238 newWordValue &= ~isLockedBit; // Release the lock. 239 newWordValue &= ~isQueueLockedBit; // Release the queue lock. 240 newWordValue &= queueHeadMask; // Clear out the old queue head. 241 newWordValue |= reinterpret_cast<uintptr_t>(newQueueHead); // Install new queue head. 242 m_word.store(newWordValue); 243 244 // Now the lock is available for acquisition. But we just have to wake up the old queue head. 245 // After that, we're done! 246 247 queueHead->nextInQueue = nullptr; 248 queueHead->queueTail = nullptr; 249 250 // We do this carefully because this may run either before or during the parkingLock critical 251 // section in lockSlow(). 252 { 253 std::lock_guard<std::mutex> locker(queueHead->parkingLock); 254 queueHead->shouldPark = false; 255 } 256 // Doesn't matter if we notify_all() or notify_one() here since the only thread that could be 257 // waiting is queueHead. 258 queueHead->parkingCondition.notify_one(); 259 260 // The old queue head can now contend for the lock again. We're done! 48 // Avoid spinning pathologically. 49 while (!try_lock()) 50 sched_yield(); 261 51 } 262 52 -
trunk/Source/bmalloc/bmalloc/StaticMutex.h
r222893 r222919 1 1 /* 2 * Copyright (C) 2014 -2017Apple Inc. All rights reserved.2 * Copyright (C) 2014 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 27 27 #define StaticMutex_h 28 28 29 #include "Algorithm.h"30 29 #include "BAssert.h" 31 #include "BExport.h"32 30 #include <atomic> 33 31 #include <mutex> … … 38 36 // Use StaticMutex in static storage, where global constructors and exit-time 39 37 // destructors are prohibited, but all memory is zero-initialized automatically. 40 41 // This uses the code from what WTF used to call WordLock. It's a fully adaptive mutex that uses42 // sizeof(void*) storage. It has a fast path that is similar to a spinlock, and a slow path that is43 // similar to std::mutex.44 45 // NOTE: In general, this is a great lock to use if you are very low in the stack. WTF continues to46 // have a copy of this code.47 48 // FIXME: Either fold bmalloc into WTF or find a better way to share code between them.49 // https://bugs.webkit.org/show_bug.cgi?id=17771950 38 51 39 namespace bmalloc { … … 58 46 public: 59 47 void lock(); 60 bool tryLock(); 61 bool try_lock() { return tryLock(); } 48 bool try_lock(); 62 49 void unlock(); 63 64 bool isHeld() const;65 bool isLocked() const { return isHeld(); }66 50 67 51 private: 68 BEXPORT void lockSlow(); 69 BEXPORT void unlockSlow(); 52 void lockSlowCase(); 70 53 71 static const uintptr_t clear = 0; 72 static const uintptr_t isLockedBit = 1; 73 static const uintptr_t isQueueLockedBit = 2; 74 static const uintptr_t queueHeadMask = 3; 54 std::atomic_flag m_flag; 55 std::atomic_flag m_isSpinning; 56 }; 75 57 76 std::atomic<uintptr_t> m_word; 77 }; 58 static inline void sleep( 59 std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds duration) 60 { 61 if (duration == std::chrono::milliseconds(0)) 62 return; 63 64 lock.unlock(); 65 std::this_thread::sleep_for(duration); 66 lock.lock(); 67 } 68 69 static inline void waitUntilFalse( 70 std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds sleepDuration, 71 bool& condition) 72 { 73 while (condition) { 74 condition = false; 75 sleep(lock, sleepDuration); 76 } 77 } 78 78 79 79 inline void StaticMutex::init() 80 80 { 81 m_word.store(0, std::memory_order_relaxed); 81 m_flag.clear(); 82 m_isSpinning.clear(); 82 83 } 83 84 84 inline bool StaticMutex::try Lock()85 inline bool StaticMutex::try_lock() 85 86 { 86 return m_word.load(std::memory_order_acquire) & isLockedBit;87 return !m_flag.test_and_set(std::memory_order_acquire); 87 88 } 88 89 89 90 inline void StaticMutex::lock() 90 91 { 91 if (compareExchangeWeak(m_word, clear, isLockedBit, std::memory_order_acquire)) 92 return; 93 94 lockSlow(); 92 if (!try_lock()) 93 lockSlowCase(); 95 94 } 96 95 97 96 inline void StaticMutex::unlock() 98 97 { 99 if (compareExchangeWeak(m_word, isLockedBit, clear, std::memory_order_release)) 100 return; 101 102 unlockSlow(); 98 m_flag.clear(std::memory_order_release); 103 99 } 104 100
Note: See TracChangeset
for help on using the changeset viewer.