Changeset 229504 in webkit
- Timestamp:
- Mar 10, 2018 9:28:09 AM (6 years ago)
- Location:
- trunk/Source
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WTF/ChangeLog
r229481 r229504 1 2018-03-10 Commit Queue <commit-queue@webkit.org> 2 3 Unreviewed, rolling out r229436. 4 https://bugs.webkit.org/show_bug.cgi?id=183542 5 6 seems to have regressed wasm compile times by 10% (Requested 7 by pizlo-mbp on #webkit). 8 9 Reverted changeset: 10 11 "bmalloc mutex should be adaptive" 12 https://bugs.webkit.org/show_bug.cgi?id=177839 13 https://trac.webkit.org/changeset/229436 14 1 15 2018-03-09 Mark Lam <mark.lam@apple.com> 2 16 -
trunk/Source/WTF/wtf/LockAlgorithmInlines.h
r229436 r229504 34 34 // the lock algorithm slow path without recompiling the world. Right now this should be included in two 35 35 // places (Lock.cpp and JSCell.cpp). 36 37 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst38 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no39 // reward.40 36 41 37 namespace WTF { -
trunk/Source/WTF/wtf/WordLock.cpp
r229436 r229504 77 77 } // anonymous namespace 78 78 79 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst80 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no81 // reward.82 83 79 NEVER_INLINE void WordLock::lockSlow() 84 80 { … … 228 224 ASSERT(currentWordValue & isQueueLockedBit); 229 225 ThreadData* queueHead = bitwise_cast<ThreadData*>(currentWordValue & ~queueHeadMask); 230 RELEASE_ASSERT(queueHead); 231 RELEASE_ASSERT(queueHead->shouldPark); // This would have been set before the thread was enqueued, so it must still be set now. 226 ASSERT(queueHead); 232 227 233 228 ThreadData* newQueueHead = queueHead->nextInQueue; … … 262 257 std::lock_guard<std::mutex> locker(queueHead->parkingLock); 263 258 queueHead->shouldPark = false; 264 // We have to do the notify while still holding the lock, since otherwise, we could try to265 // do it after the queueHead has destructed. It's impossible for queueHead to destruct266 // while we hold the lock, since it is either currently in the wait loop or it's before it267 // so it has to grab the lock before destructing.268 queueHead->parkingCondition.notify_one();269 259 } 260 // Doesn't matter if we notify_all() or notify_one() here since the only thread that could be 261 // waiting is queueHead. 262 queueHead->parkingCondition.notify_one(); 270 263 271 264 // The old queue head can now contend for the lock again. We're done! -
trunk/Source/bmalloc/ChangeLog
r229436 r229504 1 2018-03-10 Commit Queue <commit-queue@webkit.org> 2 3 Unreviewed, rolling out r229436. 4 https://bugs.webkit.org/show_bug.cgi?id=183542 5 6 seems to have regressed wasm compile times by 10% (Requested 7 by pizlo-mbp on #webkit). 8 9 Reverted changeset: 10 11 "bmalloc mutex should be adaptive" 12 https://bugs.webkit.org/show_bug.cgi?id=177839 13 https://trac.webkit.org/changeset/229436 14 1 15 2018-03-08 Filip Pizlo <fpizlo@apple.com> 2 16 -
trunk/Source/bmalloc/bmalloc/Algorithm.h
r229436 r229504 1 1 /* 2 * Copyright (C) 2014 -2017Apple Inc. All rights reserved.2 * Copyright (C) 2014 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 29 29 #include "BAssert.h" 30 30 #include <algorithm> 31 #include <atomic>32 31 #include <cstdint> 33 32 #include <cstddef> … … 163 162 } 164 163 165 // We need a CAS API that isn't the badly designed one from C++.166 template<typename T>167 bool compareExchangeWeak(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst)168 {169 // They could have made a sensible CAS API, but they didn't. Sneaky fact: for no good reason, the170 // C++ API will mutate expected. I think that apologists will say something about how it saves you171 // reloading the value around the back of your CAS loop, but that's nonsense. The delay along the172 // back edge of a CAS loop has almost no impact on CAS loop performance. More often than not, we173 // want to *add* delays to the back edge, not remove them.174 return word.compare_exchange_weak(expected, desired, order, std::memory_order_relaxed);175 }176 177 template<typename T>178 bool compareExchangeStrong(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst)179 {180 return word.compare_exchange_strong(expected, desired, order, std::memory_order_relaxed);181 }182 183 164 #define BOFFSETOF(class, field) (reinterpret_cast<ptrdiff_t>(&(reinterpret_cast<class*>(0x4000)->field)) - 0x4000) 184 165 -
trunk/Source/bmalloc/bmalloc/PerThread.h
r229436 r229504 61 61 }; 62 62 63 #if HAVE_PTHREAD_MACHDEP_H 64 63 65 class Cache; 64 66 template<typename T> struct PerThreadStorage; 65 66 #if HAVE_PTHREAD_MACHDEP_H67 67 68 68 // For now, we only support PerThread<PerHeapKind<Cache>>. We can expand to other types by … … 83 83 }; 84 84 85 #e ndif85 #else 86 86 87 87 template<typename T> struct PerThreadStorage { … … 112 112 template<typename T> pthread_key_t PerThreadStorage<T>::s_key; 113 113 template<typename T> std::once_flag PerThreadStorage<T>::s_onceFlag; 114 115 #endif 114 116 115 117 template<typename T> -
trunk/Source/bmalloc/bmalloc/StaticMutex.cpp
r229436 r229504 1 1 /* 2 * Copyright (C) 2014 -2017Apple Inc. All rights reserved.2 * Copyright (C) 2014 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 24 24 */ 25 25 26 #include "ScopeExit.h" 26 27 #include "StaticMutex.h" 27 28 #include "PerThread.h"29 #include "ScopeExit.h"30 #include <condition_variable>31 #include <mutex>32 28 #include <thread> 33 29 34 30 namespace bmalloc { 35 31 36 // FIXME: This duplicates code from WTF::WordLock. 37 // https://bugs.webkit.org/show_bug.cgi?id=177719 32 void StaticMutex::lockSlowCase() 33 { 34 // The longest critical section in bmalloc is much shorter than the 35 // time it takes to make a system call to yield to the OS scheduler. 36 // So, we try again a lot before we yield. 37 static const size_t aLot = 256; 38 39 if (!m_isSpinning.test_and_set()) { 40 auto clear = makeScopeExit([&] { m_isSpinning.clear(); }); 38 41 39 namespace { 40 41 // This data structure serves three purposes: 42 // 43 // 1) A parking mechanism for threads that go to sleep. That involves just a system mutex and 44 // condition variable. 45 // 46 // 2) A queue node for when a thread is on some lock's queue. 47 // 48 // 3) The queue head. This is kind of funky. When a thread is the head of a queue, it also serves as 49 // the basic queue bookkeeping data structure. When a thread is dequeued, the next thread in the 50 // queue takes on the queue head duties. 51 struct ThreadData { 52 // The parking mechanism. 53 bool shouldPark { false }; 54 std::mutex parkingLock; 55 std::condition_variable parkingCondition; 56 57 // The queue node. 58 ThreadData* nextInQueue { nullptr }; 59 60 // The queue itself. 61 ThreadData* queueTail { nullptr }; 62 }; 63 64 ThreadData* myThreadData() 65 { 66 return PerThread<ThreadData>::get(); 67 } 68 69 } // anonymous namespace 70 71 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst 72 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no 73 // reward. 74 75 BNO_INLINE void StaticMutex::lockSlow() 76 { 77 unsigned spinCount = 0; 78 79 // This magic number turns out to be optimal based on past JikesRVM experiments. 80 const unsigned spinLimit = 40; 81 82 for (;;) { 83 uintptr_t currentWordValue = m_word.load(); 84 85 if (!(currentWordValue & isLockedBit)) { 86 // It's not possible for someone to hold the queue lock while the lock itself is no longer 87 // held, since we will only attempt to acquire the queue lock when the lock is held and 88 // the queue lock prevents unlock. 89 BASSERT(!(currentWordValue & isQueueLockedBit)); 90 if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isLockedBit)) { 91 // Success! We acquired the lock. 42 for (size_t i = 0; i < aLot; ++i) { 43 if (try_lock()) 92 44 return; 93 }94 45 } 95 96 // If there is no queue and we haven't spun too much, we can just try to spin around again.97 if (!(currentWordValue & ~queueHeadMask) && spinCount < spinLimit) {98 spinCount++;99 sched_yield();100 continue;101 }102 103 // Need to put ourselves on the queue. Create the queue if one does not exist. This requries104 // owning the queue for a little bit. The lock that controls the queue is itself a spinlock.105 // But before we acquire the queue spinlock, we make sure that we have a ThreadData for this106 // thread.107 ThreadData* me = myThreadData();108 BASSERT(!me->shouldPark);109 BASSERT(!me->nextInQueue);110 BASSERT(!me->queueTail);111 112 // Reload the current word value, since some time may have passed.113 currentWordValue = m_word.load();114 115 // We proceed only if the queue lock is not held, the lock is held, and we succeed in116 // acquiring the queue lock.117 if ((currentWordValue & isQueueLockedBit)118 || !(currentWordValue & isLockedBit)119 || !compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit)) {120 sched_yield();121 continue;122 }123 124 me->shouldPark = true;125 126 // We own the queue. Nobody can enqueue or dequeue until we're done. Also, it's not possible127 // to release the lock while we hold the queue lock.128 ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask);129 if (queueHead) {130 // Put this thread at the end of the queue.131 queueHead->queueTail->nextInQueue = me;132 queueHead->queueTail = me;133 134 // Release the queue lock.135 currentWordValue = m_word.load();136 BASSERT(currentWordValue & ~queueHeadMask);137 BASSERT(currentWordValue & isQueueLockedBit);138 BASSERT(currentWordValue & isLockedBit);139 m_word.store(currentWordValue & ~isQueueLockedBit);140 } else {141 // Make this thread be the queue-head.142 queueHead = me;143 me->queueTail = me;144 145 // Release the queue lock and install ourselves as the head. No need for a CAS loop, since146 // we own the queue lock.147 currentWordValue = m_word.load();148 BASSERT(~(currentWordValue & ~queueHeadMask));149 BASSERT(currentWordValue & isQueueLockedBit);150 BASSERT(currentWordValue & isLockedBit);151 uintptr_t newWordValue = currentWordValue;152 newWordValue |= reinterpret_cast<uintptr_t>(queueHead);153 newWordValue &= ~isQueueLockedBit;154 m_word.store(newWordValue);155 }156 157 // At this point everyone who acquires the queue lock will see me on the queue, and anyone who158 // acquires me's lock will see that me wants to park. Note that shouldPark may have been159 // cleared as soon as the queue lock was released above, but it will happen while the160 // releasing thread holds me's parkingLock.161 162 {163 std::unique_lock<std::mutex> locker(me->parkingLock);164 while (me->shouldPark)165 me->parkingCondition.wait(locker);166 }167 168 BASSERT(!me->shouldPark);169 BASSERT(!me->nextInQueue);170 BASSERT(!me->queueTail);171 172 // Now we can loop around and try to acquire the lock again.173 }174 }175 176 BNO_INLINE void StaticMutex::unlockSlow()177 {178 // The fast path can fail either because of spurious weak CAS failure, or because someone put a179 // thread on the queue, or the queue lock is held. If the queue lock is held, it can only be180 // because someone *will* enqueue a thread onto the queue.181 182 // Acquire the queue lock, or release the lock. This loop handles both lock release in case the183 // fast path's weak CAS spuriously failed and it handles queue lock acquisition if there is184 // actually something interesting on the queue.185 for (;;) {186 uintptr_t currentWordValue = m_word.load();187 188 BASSERT(currentWordValue & isLockedBit);189 190 if (currentWordValue == isLockedBit) {191 if (compareExchangeWeak(m_word, isLockedBit, clear)) {192 // The fast path's weak CAS had spuriously failed, and now we succeeded. The lock is193 // unlocked and we're done!194 return;195 }196 // Loop around and try again.197 sched_yield();198 continue;199 }200 201 if (currentWordValue & isQueueLockedBit) {202 sched_yield();203 continue;204 }205 206 // If it wasn't just a spurious weak CAS failure and if the queue lock is not held, then there207 // must be an entry on the queue.208 BASSERT(currentWordValue & ~queueHeadMask);209 210 if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit))211 break;212 46 } 213 47 214 uintptr_t currentWordValue = m_word.load(); 215 216 // After we acquire the queue lock, the lock must still be held and the queue must be 217 // non-empty. The queue must be non-empty since only the lockSlow() method could have held the 218 // queue lock and if it did then it only releases it after putting something on the queue. 219 BASSERT(currentWordValue & isLockedBit); 220 BASSERT(currentWordValue & isQueueLockedBit); 221 ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask); 222 RELEASE_BASSERT(queueHead); 223 RELEASE_BASSERT(queueHead->shouldPark); 224 225 ThreadData* newQueueHead = queueHead->nextInQueue; 226 // Either this was the only thread on the queue, in which case we delete the queue, or there 227 // are still more threads on the queue, in which case we create a new queue head. 228 if (newQueueHead) 229 newQueueHead->queueTail = queueHead->queueTail; 230 231 // Change the queue head, possibly removing it if newQueueHead is null. No need for a CAS loop, 232 // since we hold the queue lock and the lock itself so nothing about the lock can change right 233 // now. 234 currentWordValue = m_word.load(); 235 BASSERT(currentWordValue & isLockedBit); 236 BASSERT(currentWordValue & isQueueLockedBit); 237 BASSERT((currentWordValue & ~queueHeadMask) == reinterpret_cast<uintptr_t>(queueHead)); 238 uintptr_t newWordValue = currentWordValue; 239 newWordValue &= ~isLockedBit; // Release the lock. 240 newWordValue &= ~isQueueLockedBit; // Release the queue lock. 241 newWordValue &= queueHeadMask; // Clear out the old queue head. 242 newWordValue |= reinterpret_cast<uintptr_t>(newQueueHead); // Install new queue head. 243 m_word.store(newWordValue); 244 245 // Now the lock is available for acquisition. But we just have to wake up the old queue head. 246 // After that, we're done! 247 248 queueHead->nextInQueue = nullptr; 249 queueHead->queueTail = nullptr; 250 251 // We do this carefully because this may run either before or during the parkingLock critical 252 // section in lockSlow(). 253 { 254 std::lock_guard<std::mutex> locker(queueHead->parkingLock); 255 RELEASE_BASSERT(queueHead->shouldPark); 256 queueHead->shouldPark = false; 257 // We have to do the notify while still holding the lock, since otherwise, we could try to 258 // do it after the queueHead has destructed. It's impossible for queueHead to destruct 259 // while we hold the lock, since it is either currently in the wait loop or it's before it 260 // so it has to grab the lock before destructing. 261 queueHead->parkingCondition.notify_one(); 262 } 263 264 // The old queue head can now contend for the lock again. We're done! 48 // Avoid spinning pathologically. 49 while (!try_lock()) 50 sched_yield(); 265 51 } 266 52 -
trunk/Source/bmalloc/bmalloc/StaticMutex.h
r229436 r229504 1 1 /* 2 * Copyright (C) 2014 -2017Apple Inc. All rights reserved.2 * Copyright (C) 2014 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 27 27 #define StaticMutex_h 28 28 29 #include "Algorithm.h"30 29 #include "BAssert.h" 31 #include "BExport.h"32 30 #include <atomic> 33 31 #include <mutex> … … 38 36 // Use StaticMutex in static storage, where global constructors and exit-time 39 37 // destructors are prohibited, but all memory is zero-initialized automatically. 40 41 // This uses the code from what WTF used to call WordLock. It's a fully adaptive mutex that uses42 // sizeof(void*) storage. It has a fast path that is similar to a spinlock, and a slow path that is43 // similar to std::mutex.44 45 // NOTE: In general, this is a great lock to use if you are very low in the stack. WTF continues to46 // have a copy of this code.47 48 // FIXME: Either fold bmalloc into WTF or find a better way to share code between them.49 // https://bugs.webkit.org/show_bug.cgi?id=17771950 38 51 39 namespace bmalloc { … … 58 46 public: 59 47 void lock(); 60 bool tryLock(); 61 bool try_lock() { return tryLock(); } 48 bool try_lock(); 62 49 void unlock(); 63 64 bool isHeld() const;65 bool isLocked() const { return isHeld(); }66 50 67 51 private: 68 BEXPORT void lockSlow(); 69 BEXPORT void unlockSlow(); 52 BEXPORT void lockSlowCase(); 70 53 71 static const uintptr_t clear = 0; 72 static const uintptr_t isLockedBit = 1; 73 static const uintptr_t isQueueLockedBit = 2; 74 static const uintptr_t queueHeadMask = 3; 54 std::atomic_flag m_flag; 55 std::atomic_flag m_isSpinning; 56 }; 75 57 76 std::atomic<uintptr_t> m_word; 77 }; 58 static inline void sleep( 59 std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds duration) 60 { 61 if (duration == std::chrono::milliseconds(0)) 62 return; 63 64 lock.unlock(); 65 std::this_thread::sleep_for(duration); 66 lock.lock(); 67 } 68 69 static inline void waitUntilFalse( 70 std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds sleepDuration, 71 bool& condition) 72 { 73 while (condition) { 74 condition = false; 75 sleep(lock, sleepDuration); 76 } 77 } 78 78 79 79 inline void StaticMutex::init() 80 80 { 81 m_word.store(0, std::memory_order_relaxed); 81 m_flag.clear(); 82 m_isSpinning.clear(); 82 83 } 83 84 84 inline bool StaticMutex::try Lock()85 inline bool StaticMutex::try_lock() 85 86 { 86 for (;;) { 87 uintptr_t currentWordValue = m_word.load(std::memory_order_relaxed); 88 89 if (currentWordValue & isLockedBit) 90 return false; 91 92 if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isLockedBit, std::memory_order_acquire)) 93 return true; 94 } 87 return !m_flag.test_and_set(std::memory_order_acquire); 95 88 } 96 89 97 90 inline void StaticMutex::lock() 98 91 { 99 if (compareExchangeWeak(m_word, clear, isLockedBit, std::memory_order_acquire)) 100 return; 101 102 lockSlow(); 92 if (!try_lock()) 93 lockSlowCase(); 103 94 } 104 95 105 96 inline void StaticMutex::unlock() 106 97 { 107 if (compareExchangeWeak(m_word, isLockedBit, clear, std::memory_order_release)) 108 return; 109 110 unlockSlow(); 98 m_flag.clear(std::memory_order_release); 111 99 } 112 100
Note: See TracChangeset
for help on using the changeset viewer.