Changeset 222893 in webkit
- Timestamp:
- Oct 4, 2017 8:05:42 PM (6 years ago)
- Location:
- trunk/Source
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WTF/ChangeLog
r222878 r222893 1 2017-10-04 Filip Pizlo <fpizlo@apple.com> 2 3 bmalloc mutex should be adaptive 4 https://bugs.webkit.org/show_bug.cgi?id=177839 5 6 Reviewed by Michael Saboff. 7 8 Add some comments that I thought of while copy-pasting this code. 9 10 * wtf/LockAlgorithmInlines.h: 11 * wtf/WordLock.cpp: 12 1 13 2017-10-04 JF Bastien <jfbastien@apple.com> 2 14 -
trunk/Source/WTF/wtf/LockAlgorithmInlines.h
r220186 r222893 33 33 // the lock algorithm slow path without recompiling the world. Right now this should be included in two 34 34 // places (Lock.cpp and JSCell.cpp). 35 36 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst 37 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no 38 // reward. 35 39 36 40 namespace WTF { -
trunk/Source/WTF/wtf/WordLock.cpp
r219763 r222893 77 77 } // anonymous namespace 78 78 79 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst 80 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no 81 // reward. 82 79 83 NEVER_INLINE void WordLockBase::lockSlow() 80 84 { -
trunk/Source/bmalloc/ChangeLog
r222731 r222893 1 2017-10-04 Filip Pizlo <fpizlo@apple.com> 2 3 bmalloc mutex should be adaptive 4 https://bugs.webkit.org/show_bug.cgi?id=177839 5 6 Reviewed by Michael Saboff. 7 8 This pulls the WordLock algorithm into bmalloc, mostly by copy-pasting the code. We need to 9 copy paste because sometimes we build WTF without bmalloc, so WTF cannot rely on bmalloc for 10 anything other than malloc. 11 12 * bmalloc/Algorithm.h: 13 (bmalloc::compareExchangeWeak): 14 (bmalloc::compareExchangeStrong): 15 * bmalloc/PerThread.h: 16 * bmalloc/StaticMutex.cpp: 17 (bmalloc::StaticMutex::lockSlow): 18 (bmalloc::StaticMutex::unlockSlow): 19 (bmalloc::StaticMutex::lockSlowCase): Deleted. 20 * bmalloc/StaticMutex.h: 21 (bmalloc::StaticMutex::try_lock): 22 (bmalloc::StaticMutex::isLocked const): 23 (bmalloc::StaticMutex::init): 24 (bmalloc::StaticMutex::tryLock): 25 (bmalloc::StaticMutex::lock): 26 (bmalloc::StaticMutex::unlock): 27 (bmalloc::sleep): Deleted. 28 (bmalloc::waitUntilFalse): Deleted. 29 1 30 2017-10-02 Yusuke Suzuki <utatane.tea@gmail.com> 2 31 -
trunk/Source/bmalloc/bmalloc/Algorithm.h
r220097 r222893 1 1 /* 2 * Copyright (C) 2014 Apple Inc. All rights reserved.2 * Copyright (C) 2014-2017 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 29 29 #include "BAssert.h" 30 30 #include <algorithm> 31 #include <atomic> 31 32 #include <cstdint> 32 33 #include <cstddef> … … 159 160 } 160 161 162 // We need a CAS API that isn't the badly designed one from C++. 163 template<typename T> 164 bool compareExchangeWeak(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst) 165 { 166 // They could have made a sensible CAS API, but they didn't. Sneaky fact: for no good reason, the 167 // C++ API will mutate expected. I think that apologists will say something about how it saves you 168 // reloading the value around the back of your CAS loop, but that's nonsense. The delay along the 169 // back edge of a CAS loop has almost no impact on CAS loop performance. More often than not, we 170 // want to *add* delays to the back edge, not remove them. 171 return word.compare_exchange_weak(expected, desired, order, std::memory_order_relaxed); 172 } 173 174 template<typename T> 175 bool compareExchangeStrong(std::atomic<T>& word, T expected, T desired, std::memory_order order = std::memory_order_seq_cst) 176 { 177 return word.compare_exchange_strong(expected, desired, order, std::memory_order_relaxed); 178 } 179 161 180 } // namespace bmalloc 162 181 -
trunk/Source/bmalloc/bmalloc/PerThread.h
r220352 r222893 61 61 }; 62 62 63 #if HAVE_PTHREAD_MACHDEP_H64 65 63 class Cache; 66 64 template<typename T> struct PerThreadStorage; 65 66 #if HAVE_PTHREAD_MACHDEP_H 67 67 68 68 // For now, we only support PerThread<PerHeapKind<Cache>>. We can expand to other types by … … 83 83 }; 84 84 85 #e lse85 #endif 86 86 87 87 template<typename T> struct PerThreadStorage { … … 112 112 template<typename T> pthread_key_t PerThreadStorage<T>::s_key; 113 113 template<typename T> std::once_flag PerThreadStorage<T>::s_onceFlag; 114 115 #endif116 114 117 115 template<typename T> -
trunk/Source/bmalloc/bmalloc/StaticMutex.cpp
r219763 r222893 1 1 /* 2 * Copyright (C) 2014 Apple Inc. All rights reserved.2 * Copyright (C) 2014-2017 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 24 24 */ 25 25 26 #include "StaticMutex.h" 27 28 #include "PerThread.h" 26 29 #include "ScopeExit.h" 27 #include "StaticMutex.h" 30 #include <condition_variable> 31 #include <mutex> 28 32 #include <thread> 29 33 30 34 namespace bmalloc { 31 35 32 void StaticMutex::lockSlowCase() 36 // FIXME: This duplicates code from WTF::WordLock. 37 // https://bugs.webkit.org/show_bug.cgi?id=177719 38 39 namespace { 40 41 // This data structure serves three purposes: 42 // 43 // 1) A parking mechanism for threads that go to sleep. That involves just a system mutex and 44 // condition variable. 45 // 46 // 2) A queue node for when a thread is on some lock's queue. 47 // 48 // 3) The queue head. This is kind of funky. When a thread is the head of a queue, it also serves as 49 // the basic queue bookkeeping data structure. When a thread is dequeued, the next thread in the 50 // queue takes on the queue head duties. 51 struct ThreadData { 52 // The parking mechanism. 53 bool shouldPark { false }; 54 std::mutex parkingLock; 55 std::condition_variable parkingCondition; 56 57 // The queue node. 58 ThreadData* nextInQueue { nullptr }; 59 60 // The queue itself. 61 ThreadData* queueTail { nullptr }; 62 }; 63 64 ThreadData* myThreadData() 33 65 { 34 // The longest critical section in bmalloc is much shorter than the 35 // time it takes to make a system call to yield to the OS scheduler. 36 // So, we try again a lot before we yield. 37 static const size_t aLot = 256; 66 return PerThread<ThreadData>::get(); 67 } 68 69 } // anonymous namespace 70 71 // NOTE: It's a bug to use any memory order other than seq_cst in this code. The cost of seq_cst 72 // fencing is negligible on slow paths, so any use of a more relaxed memory model is all risk and no 73 // reward. 74 75 BNO_INLINE void StaticMutex::lockSlow() 76 { 77 unsigned spinCount = 0; 78 79 // This magic number turns out to be optimal based on past JikesRVM experiments. 80 const unsigned spinLimit = 40; 38 81 39 if (!m_isSpinning.test_and_set()) { 40 auto clear = makeScopeExit([&] { m_isSpinning.clear(); }); 41 42 for (size_t i = 0; i < aLot; ++i) { 43 if (try_lock()) 82 for (;;) { 83 uintptr_t currentWordValue = m_word.load(); 84 85 if (!(currentWordValue & isLockedBit)) { 86 // It's not possible for someone to hold the queue lock while the lock itself is no longer 87 // held, since we will only attempt to acquire the queue lock when the lock is held and 88 // the queue lock prevents unlock. 89 BASSERT(!(currentWordValue & isQueueLockedBit)); 90 if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isLockedBit)) { 91 // Success! We acquired the lock. 44 92 return; 45 } 93 } 94 } 95 96 // If there is no queue and we haven't spun too much, we can just try to spin around again. 97 if (!(currentWordValue & ~queueHeadMask) && spinCount < spinLimit) { 98 spinCount++; 99 sched_yield(); 100 continue; 101 } 102 103 // Need to put ourselves on the queue. Create the queue if one does not exist. This requries 104 // owning the queue for a little bit. The lock that controls the queue is itself a spinlock. 105 // But before we acquire the queue spinlock, we make sure that we have a ThreadData for this 106 // thread. 107 ThreadData* me = myThreadData(); 108 BASSERT(!me->shouldPark); 109 BASSERT(!me->nextInQueue); 110 BASSERT(!me->queueTail); 111 112 // Reload the current word value, since some time may have passed. 113 currentWordValue = m_word.load(); 114 115 // We proceed only if the queue lock is not held, the lock is held, and we succeed in 116 // acquiring the queue lock. 117 if ((currentWordValue & isQueueLockedBit) 118 || !(currentWordValue & isLockedBit) 119 || !compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit)) { 120 sched_yield(); 121 continue; 122 } 123 124 me->shouldPark = true; 125 126 // We own the queue. Nobody can enqueue or dequeue until we're done. Also, it's not possible 127 // to release the lock while we hold the queue lock. 128 ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask); 129 if (queueHead) { 130 // Put this thread at the end of the queue. 131 queueHead->queueTail->nextInQueue = me; 132 queueHead->queueTail = me; 133 134 // Release the queue lock. 135 currentWordValue = m_word.load(); 136 BASSERT(currentWordValue & ~queueHeadMask); 137 BASSERT(currentWordValue & isQueueLockedBit); 138 BASSERT(currentWordValue & isLockedBit); 139 m_word.store(currentWordValue & ~isQueueLockedBit); 140 } else { 141 // Make this thread be the queue-head. 142 queueHead = me; 143 me->queueTail = me; 144 145 // Release the queue lock and install ourselves as the head. No need for a CAS loop, since 146 // we own the queue lock. 147 currentWordValue = m_word.load(); 148 BASSERT(~(currentWordValue & ~queueHeadMask)); 149 BASSERT(currentWordValue & isQueueLockedBit); 150 BASSERT(currentWordValue & isLockedBit); 151 uintptr_t newWordValue = currentWordValue; 152 newWordValue |= reinterpret_cast<uintptr_t>(queueHead); 153 newWordValue &= ~isQueueLockedBit; 154 m_word.store(newWordValue); 155 } 156 157 // At this point everyone who acquires the queue lock will see me on the queue, and anyone who 158 // acquires me's lock will see that me wants to park. Note that shouldPark may have been 159 // cleared as soon as the queue lock was released above, but it will happen while the 160 // releasing thread holds me's parkingLock. 161 162 { 163 std::unique_lock<std::mutex> locker(me->parkingLock); 164 while (me->shouldPark) 165 me->parkingCondition.wait(locker); 166 } 167 168 BASSERT(!me->shouldPark); 169 BASSERT(!me->nextInQueue); 170 BASSERT(!me->queueTail); 171 172 // Now we can loop around and try to acquire the lock again. 46 173 } 47 48 // Avoid spinning pathologically.49 while (!try_lock())50 sched_yield();51 174 } 52 175 176 BNO_INLINE void StaticMutex::unlockSlow() 177 { 178 // The fast path can fail either because of spurious weak CAS failure, or because someone put a 179 // thread on the queue, or the queue lock is held. If the queue lock is held, it can only be 180 // because someone *will* enqueue a thread onto the queue. 181 182 // Acquire the queue lock, or release the lock. This loop handles both lock release in case the 183 // fast path's weak CAS spuriously failed and it handles queue lock acquisition if there is 184 // actually something interesting on the queue. 185 for (;;) { 186 uintptr_t currentWordValue = m_word.load(); 187 188 BASSERT(currentWordValue & isLockedBit); 189 190 if (currentWordValue == isLockedBit) { 191 if (compareExchangeWeak(m_word, isLockedBit, clear)) { 192 // The fast path's weak CAS had spuriously failed, and now we succeeded. The lock is 193 // unlocked and we're done! 194 return; 195 } 196 // Loop around and try again. 197 sched_yield(); 198 continue; 199 } 200 201 if (currentWordValue & isQueueLockedBit) { 202 sched_yield(); 203 continue; 204 } 205 206 // If it wasn't just a spurious weak CAS failure and if the queue lock is not held, then there 207 // must be an entry on the queue. 208 BASSERT(currentWordValue & ~queueHeadMask); 209 210 if (compareExchangeWeak(m_word, currentWordValue, currentWordValue | isQueueLockedBit)) 211 break; 212 } 213 214 uintptr_t currentWordValue = m_word.load(); 215 216 // After we acquire the queue lock, the lock must still be held and the queue must be 217 // non-empty. The queue must be non-empty since only the lockSlow() method could have held the 218 // queue lock and if it did then it only releases it after putting something on the queue. 219 BASSERT(currentWordValue & isLockedBit); 220 BASSERT(currentWordValue & isQueueLockedBit); 221 ThreadData* queueHead = reinterpret_cast<ThreadData*>(currentWordValue & ~queueHeadMask); 222 BASSERT(queueHead); 223 224 ThreadData* newQueueHead = queueHead->nextInQueue; 225 // Either this was the only thread on the queue, in which case we delete the queue, or there 226 // are still more threads on the queue, in which case we create a new queue head. 227 if (newQueueHead) 228 newQueueHead->queueTail = queueHead->queueTail; 229 230 // Change the queue head, possibly removing it if newQueueHead is null. No need for a CAS loop, 231 // since we hold the queue lock and the lock itself so nothing about the lock can change right 232 // now. 233 currentWordValue = m_word.load(); 234 BASSERT(currentWordValue & isLockedBit); 235 BASSERT(currentWordValue & isQueueLockedBit); 236 BASSERT((currentWordValue & ~queueHeadMask) == reinterpret_cast<uintptr_t>(queueHead)); 237 uintptr_t newWordValue = currentWordValue; 238 newWordValue &= ~isLockedBit; // Release the lock. 239 newWordValue &= ~isQueueLockedBit; // Release the queue lock. 240 newWordValue &= queueHeadMask; // Clear out the old queue head. 241 newWordValue |= reinterpret_cast<uintptr_t>(newQueueHead); // Install new queue head. 242 m_word.store(newWordValue); 243 244 // Now the lock is available for acquisition. But we just have to wake up the old queue head. 245 // After that, we're done! 246 247 queueHead->nextInQueue = nullptr; 248 queueHead->queueTail = nullptr; 249 250 // We do this carefully because this may run either before or during the parkingLock critical 251 // section in lockSlow(). 252 { 253 std::lock_guard<std::mutex> locker(queueHead->parkingLock); 254 queueHead->shouldPark = false; 255 } 256 // Doesn't matter if we notify_all() or notify_one() here since the only thread that could be 257 // waiting is queueHead. 258 queueHead->parkingCondition.notify_one(); 259 260 // The old queue head can now contend for the lock again. We're done! 261 } 262 53 263 } // namespace bmalloc -
trunk/Source/bmalloc/bmalloc/StaticMutex.h
r205210 r222893 1 1 /* 2 * Copyright (C) 2014 Apple Inc. All rights reserved.2 * Copyright (C) 2014-2017 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 27 27 #define StaticMutex_h 28 28 29 #include "Algorithm.h" 29 30 #include "BAssert.h" 31 #include "BExport.h" 30 32 #include <atomic> 31 33 #include <mutex> … … 36 38 // Use StaticMutex in static storage, where global constructors and exit-time 37 39 // destructors are prohibited, but all memory is zero-initialized automatically. 40 41 // This uses the code from what WTF used to call WordLock. It's a fully adaptive mutex that uses 42 // sizeof(void*) storage. It has a fast path that is similar to a spinlock, and a slow path that is 43 // similar to std::mutex. 44 45 // NOTE: In general, this is a great lock to use if you are very low in the stack. WTF continues to 46 // have a copy of this code. 47 48 // FIXME: Either fold bmalloc into WTF or find a better way to share code between them. 49 // https://bugs.webkit.org/show_bug.cgi?id=177719 38 50 39 51 namespace bmalloc { … … 46 58 public: 47 59 void lock(); 48 bool try_lock(); 60 bool tryLock(); 61 bool try_lock() { return tryLock(); } 49 62 void unlock(); 63 64 bool isHeld() const; 65 bool isLocked() const { return isHeld(); } 50 66 51 67 private: 52 void lockSlowCase(); 68 BEXPORT void lockSlow(); 69 BEXPORT void unlockSlow(); 53 70 54 std::atomic_flag m_flag; 55 std::atomic_flag m_isSpinning; 71 static const uintptr_t clear = 0; 72 static const uintptr_t isLockedBit = 1; 73 static const uintptr_t isQueueLockedBit = 2; 74 static const uintptr_t queueHeadMask = 3; 75 76 std::atomic<uintptr_t> m_word; 56 77 }; 57 58 static inline void sleep(59 std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds duration)60 {61 if (duration == std::chrono::milliseconds(0))62 return;63 64 lock.unlock();65 std::this_thread::sleep_for(duration);66 lock.lock();67 }68 69 static inline void waitUntilFalse(70 std::unique_lock<StaticMutex>& lock, std::chrono::milliseconds sleepDuration,71 bool& condition)72 {73 while (condition) {74 condition = false;75 sleep(lock, sleepDuration);76 }77 }78 78 79 79 inline void StaticMutex::init() 80 80 { 81 m_flag.clear(); 82 m_isSpinning.clear(); 81 m_word.store(0, std::memory_order_relaxed); 83 82 } 84 83 85 inline bool StaticMutex::try _lock()84 inline bool StaticMutex::tryLock() 86 85 { 87 return !m_flag.test_and_set(std::memory_order_acquire);86 return m_word.load(std::memory_order_acquire) & isLockedBit; 88 87 } 89 88 90 89 inline void StaticMutex::lock() 91 90 { 92 if (!try_lock()) 93 lockSlowCase(); 91 if (compareExchangeWeak(m_word, clear, isLockedBit, std::memory_order_acquire)) 92 return; 93 94 lockSlow(); 94 95 } 95 96 96 97 inline void StaticMutex::unlock() 97 98 { 98 m_flag.clear(std::memory_order_release); 99 if (compareExchangeWeak(m_word, isLockedBit, clear, std::memory_order_release)) 100 return; 101 102 unlockSlow(); 99 103 } 100 104
Note: See TracChangeset
for help on using the changeset viewer.