Changeset 188323 in webkit
- Timestamp:
- Aug 11, 2015 9:20:24 PM (9 years ago)
- Location:
- trunk
- Files:
-
- 2 added
- 2 deleted
- 12 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/ChangeLog
r188311 r188323 1 2015-08-11 Filip Pizlo <fpizlo@apple.com> 2 3 Always use a byte-sized lock implementation 4 https://bugs.webkit.org/show_bug.cgi?id=147908 5 6 Reviewed by Geoffrey Garen. 7 8 * runtime/ConcurrentJITLock.h: Lock is now byte-sized and ByteLock is gone, so use Lock. 9 1 10 2015-08-11 Alexey Proskuryakov <ap@apple.com> 2 11 -
trunk/Source/JavaScriptCore/runtime/ConcurrentJITLock.h
r188280 r188323 28 28 29 29 #include "DeferGC.h" 30 #include <wtf/ ByteLock.h>30 #include <wtf/Lock.h> 31 31 #include <wtf/NoLock.h> 32 32 … … 34 34 35 35 #if ENABLE(CONCURRENT_JIT) 36 typedef ByteLock ConcurrentJITLock;37 typedef ByteLocker ConcurrentJITLockerImpl;36 typedef Lock ConcurrentJITLock; 37 typedef LockHolder ConcurrentJITLockerImpl; 38 38 #else 39 39 typedef NoLock ConcurrentJITLock; -
trunk/Source/WTF/ChangeLog
r188301 r188323 1 2015-08-11 Filip Pizlo <fpizlo@apple.com> 2 3 Always use a byte-sized lock implementation 4 https://bugs.webkit.org/show_bug.cgi?id=147908 5 6 Reviewed by Geoffrey Garen. 7 8 At the start of my locking algorithm crusade, I implemented Lock, which is a sizeof(void*) 9 lock implementation with some nice theoretical properties and good performance. Then I added 10 the ParkingLot abstraction and ByteLock. ParkingLot uses Lock in its implementation. 11 ByteLock uses ParkingLot to create a sizeof(char) lock implementation that performs like 12 Lock. 13 14 It turns out that ByteLock is always at least as good as Lock, and sometimes a lot better: 15 it requires 8x less memory on 64-bit systems. It's hard to construct a benchmark where 16 ByteLock is significantly slower than Lock, and when you do construct such a benchmark, 17 tweaking it a bit can also create a scenario where ByteLock is significantly faster than 18 Lock. 19 20 So, the thing that we call "Lock" should really use ByteLock's algorithm, since it is more 21 compact and just as fast. That's what this patch does. 22 23 But we still need to keep the old Lock algorithm, because it's used to implement ParkingLot, 24 which in turn is used to implement ByteLock. So this patch does this transformation: 25 26 - Move the algorithm in Lock into files called WordLock.h|cpp. Make ParkingLot use 27 WordLock. 28 29 - Move the algorithm in ByteLock into Lock.h|cpp. Make everyone who used ByteLock use Lock 30 instead. All other users of Lock now get the byte-sized lock implementation. 31 32 - Remove the old ByteLock files. 33 34 * WTF.vcxproj/WTF.vcxproj: 35 * WTF.xcodeproj/project.pbxproj: 36 * benchmarks/LockSpeedTest.cpp: 37 (main): 38 * wtf/WordLock.cpp: Added. 39 (WTF::WordLock::lockSlow): 40 (WTF::WordLock::unlockSlow): 41 * wtf/WordLock.h: Added. 42 (WTF::WordLock::WordLock): 43 (WTF::WordLock::lock): 44 (WTF::WordLock::unlock): 45 (WTF::WordLock::isHeld): 46 (WTF::WordLock::isLocked): 47 * wtf/ByteLock.cpp: Removed. 48 * wtf/ByteLock.h: Removed. 49 * wtf/CMakeLists.txt: 50 * wtf/Lock.cpp: 51 (WTF::LockBase::lockSlow): 52 (WTF::LockBase::unlockSlow): 53 * wtf/Lock.h: 54 (WTF::LockBase::lock): 55 (WTF::LockBase::unlock): 56 (WTF::LockBase::isHeld): 57 (WTF::LockBase::isLocked): 58 (WTF::Lock::Lock): 59 * wtf/ParkingLot.cpp: 60 1 61 2015-08-11 Filip Pizlo <fpizlo@apple.com> 2 62 -
trunk/Source/WTF/WTF.vcxproj/WTF.vcxproj
r188291 r188323 1 <?xml version="1.0" encoding="utf-8"?>1 <?xml version="1.0" encoding="utf-8"?> 2 2 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> 3 3 <ItemGroup Label="ProjectConfigurations"> … … 54 54 <ClCompile Include="..\wtf\Assertions.cpp" /> 55 55 <ClCompile Include="..\wtf\BitVector.cpp" /> 56 <ClCompile Include="..\wtf\ByteLock.cpp" />57 56 <ClCompile Include="..\wtf\CompilationThread.cpp" /> 58 57 <ClCompile Include="..\wtf\CryptographicUtilities.cpp" /> … … 156 155 <ClCompile Include="..\wtf\win\WorkQueueWin.cpp" /> 157 156 <ClCompile Include="..\wtf\win\WTFDLL.cpp" /> 157 <ClCompile Include="..\wtf\WordLock.cpp" /> 158 158 <ClCompile Include="..\wtf\WorkQueue.cpp" /> 159 159 <ClCompile Include="..\wtf\WTFThreadData.cpp" /> … … 174 174 <ClInclude Include="..\wtf\BloomFilter.h" /> 175 175 <ClInclude Include="..\wtf\BumpPointerAllocator.h" /> 176 <ClInclude Include="..\wtf\ByteLock.h" />177 176 <ClInclude Include="..\wtf\CheckedArithmetic.h" /> 178 177 <ClInclude Include="..\wtf\CheckedBoolean.h" /> … … 315 314 <ClInclude Include="..\wtf\win\GDIObject.h" /> 316 315 <ClInclude Include="..\wtf\win\WorkItemWin.h" /> 316 <ClInclude Include="..\wtf\WordLock.h" /> 317 317 <ClInclude Include="..\wtf\WorkQueue.h" /> 318 318 <ClInclude Include="..\wtf\WTFThreadData.h" /> -
trunk/Source/WTF/WTF.xcodeproj/project.pbxproj
r188301 r188323 25 25 0F2B66A617B6B4FB00A7AE3F /* DeferrableRefCounted.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F2B66A417B6B4F700A7AE3F /* DeferrableRefCounted.h */; }; 26 26 0F2B66A717B6B4FD00A7AE3F /* FlipBytes.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F2B66A517B6B4F700A7AE3F /* FlipBytes.h */; }; 27 0F824A661B7443A0002E345D /* ByteLock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F824A621B7443A0002E345D /* ByteLock.cpp */; };28 0F824A671B7443A0002E345D /* ByteLock.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F824A631B7443A0002E345D /* ByteLock.h */; };29 27 0F824A681B7443A0002E345D /* ParkingLot.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0F824A641B7443A0002E345D /* ParkingLot.cpp */; }; 30 28 0F824A691B7443A0002E345D /* ParkingLot.h in Headers */ = {isa = PBXBuildFile; fileRef = 0F824A651B7443A0002E345D /* ParkingLot.h */; }; … … 47 45 0FE1646A1B6FFC9600400E7C /* Lock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0FE164681B6FFC9600400E7C /* Lock.cpp */; }; 48 46 0FE1646B1B6FFC9600400E7C /* Lock.h in Headers */ = {isa = PBXBuildFile; fileRef = 0FE164691B6FFC9600400E7C /* Lock.h */; }; 47 0FE4479C1B7AAA03009498EB /* WordLock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0FE4479A1B7AAA03009498EB /* WordLock.cpp */; }; 48 0FE4479D1B7AAA03009498EB /* WordLock.h in Headers */ = {isa = PBXBuildFile; fileRef = 0FE4479B1B7AAA03009498EB /* WordLock.h */; }; 49 49 0FED67B61B22D4D80066CE15 /* TinyPtrSet.h in Headers */ = {isa = PBXBuildFile; fileRef = 0FED67B51B22D4D80066CE15 /* TinyPtrSet.h */; }; 50 50 14022F4118F5C3FC007FF0EB /* libbmalloc.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 14022F4018F5C3FC007FF0EB /* libbmalloc.a */; }; … … 312 312 0F2B66A517B6B4F700A7AE3F /* FlipBytes.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = FlipBytes.h; sourceTree = "<group>"; }; 313 313 0F300B7D18AB48B400A6D72E /* HashMethod.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = HashMethod.h; sourceTree = "<group>"; }; 314 0F824A621B7443A0002E345D /* ByteLock.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ByteLock.cpp; sourceTree = "<group>"; };315 0F824A631B7443A0002E345D /* ByteLock.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ByteLock.h; sourceTree = "<group>"; };316 314 0F824A641B7443A0002E345D /* ParkingLot.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParkingLot.cpp; sourceTree = "<group>"; }; 317 315 0F824A651B7443A0002E345D /* ParkingLot.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParkingLot.h; sourceTree = "<group>"; }; … … 334 332 0FE164681B6FFC9600400E7C /* Lock.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Lock.cpp; sourceTree = "<group>"; }; 335 333 0FE164691B6FFC9600400E7C /* Lock.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Lock.h; sourceTree = "<group>"; }; 334 0FE4479A1B7AAA03009498EB /* WordLock.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = WordLock.cpp; sourceTree = "<group>"; }; 335 0FE4479B1B7AAA03009498EB /* WordLock.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WordLock.h; sourceTree = "<group>"; }; 336 336 0FED67B51B22D4D80066CE15 /* TinyPtrSet.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TinyPtrSet.h; sourceTree = "<group>"; }; 337 337 14022F4018F5C3FC007FF0EB /* libbmalloc.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = libbmalloc.a; sourceTree = BUILT_PRODUCTS_DIR; }; … … 724 724 A8A47265151A825A004123FF /* BloomFilter.h */, 725 725 A8A47267151A825A004123FF /* BumpPointerAllocator.h */, 726 0F824A621B7443A0002E345D /* ByteLock.cpp */,727 0F824A631B7443A0002E345D /* ByteLock.h */,728 726 EB95E1EF161A72410089A2F5 /* ByteOrder.h */, 729 727 A8A4726A151A825A004123FF /* CheckedArithmetic.h */, … … 893 891 A8A47372151A825B004123FF /* VMTags.h */, 894 892 974CFC8D16A4F327006D5404 /* WeakPtr.h */, 893 0FE4479A1B7AAA03009498EB /* WordLock.cpp */, 894 0FE4479B1B7AAA03009498EB /* WordLock.h */, 895 895 E4A0AD371A96245500536DF6 /* WorkQueue.cpp */, 896 896 E4A0AD381A96245500536DF6 /* WorkQueue.h */, … … 1167 1167 A8A47416151A825B004123FF /* RandomNumberSeed.h in Headers */, 1168 1168 0F87105A16643F190090B0AD /* RawPointer.h in Headers */, 1169 0F824A671B7443A0002E345D /* ByteLock.h in Headers */,1170 1169 A8A47417151A825B004123FF /* RedBlackTree.h in Headers */, 1171 1170 A8A47418151A825B004123FF /* RefCounted.h in Headers */, … … 1221 1220 A8A47455151A825B004123FF /* ThreadSpecific.h in Headers */, 1222 1221 149EF16316BBFE0D000A4331 /* TriState.h in Headers */, 1222 0FE4479D1B7AAA03009498EB /* WordLock.h in Headers */, 1223 1223 A8A4746D151A825B004123FF /* UnionFind.h in Headers */, 1224 1224 A8A4746A151A825B004123FF /* UTF8.h in Headers */, … … 1338 1338 A8A47386151A825B004123FF /* Assertions.cpp in Sources */, 1339 1339 A8A47435151A825B004123FF /* AtomicString.cpp in Sources */, 1340 0F824A661B7443A0002E345D /* ByteLock.cpp in Sources */,1341 1340 9BC70F05176C379D00101DEC /* AtomicStringTable.cpp in Sources */, 1342 1341 A5BA15FB182435A600A82E69 /* StringCF.cpp in Sources */, … … 1401 1400 FEDACD3D1630F83F00C69634 /* StackStats.cpp in Sources */, 1402 1401 A8A4743C151A825B004123FF /* StringBuilder.cpp in Sources */, 1402 0FE4479C1B7AAA03009498EB /* WordLock.cpp in Sources */, 1403 1403 A8A47440151A825B004123FF /* StringImpl.cpp in Sources */, 1404 1404 A5BA15FC182435A600A82E69 /* StringImplCF.cpp in Sources */, -
trunk/Source/WTF/benchmarks/LockSpeedTest.cpp
r188280 r188323 27 27 28 28 #include <unistd.h> 29 #include <wtf/ByteLock.h>30 29 #include <wtf/CurrentTime.h> 31 30 #include <wtf/Lock.h> … … 34 33 #include <wtf/Threading.h> 35 34 #include <wtf/ThreadingPrimitives.h> 35 #include <wtf/WordLock.h> 36 36 37 37 namespace { … … 45 45 NO_RETURN void usage() 46 46 { 47 printf("Usage: LockSpeedTest spinlock| lock|bytelock|mutex|all <num thread groups> <num threads per group> <work per critical section> <num noise threads> <num iterations>\n");47 printf("Usage: LockSpeedTest spinlock|wordlock|lock|bytelock|mutex|all <num thread groups> <num threads per group> <work per critical section> <num noise threads> <num iterations>\n"); 48 48 exit(1); 49 49 } … … 123 123 didRun = true; 124 124 } 125 if (!strcmp(argv[1], "wordlock") || !strcmp(argv[1], "all")) { 126 runBenchmark<WordLock>("WTF WordLock"); 127 didRun = true; 128 } 125 129 if (!strcmp(argv[1], "lock") || !strcmp(argv[1], "all")) { 126 130 runBenchmark<Lock>("WTF Lock"); 127 didRun = true;128 }129 if (!strcmp(argv[1], "bytelock") || !strcmp(argv[1], "all")) {130 runBenchmark<ByteLock>("WTF ByteLock");131 131 didRun = true; 132 132 } -
trunk/Source/WTF/wtf/CMakeLists.txt
r188280 r188323 8 8 Bitmap.h 9 9 BumpPointerAllocator.h 10 ByteLock.h11 10 ByteOrder.h 12 11 CompilationThread.h … … 105 104 WTFThreadData.h 106 105 WeakPtr.h 106 WordLock.h 107 107 WorkQueue.h 108 108 dtoa.h … … 147 147 Atomics.cpp 148 148 BitVector.cpp 149 ByteLock.cpp150 149 CompilationThread.cpp 151 150 CryptographicUtilities.cpp … … 184 183 Threading.cpp 185 184 WTFThreadData.cpp 185 WordLock.cpp 186 186 WorkQueue.cpp 187 187 dtoa.cpp -
trunk/Source/WTF/wtf/Lock.cpp
r188169 r188323 28 28 29 29 #include "DataLog.h" 30 #include "ParkingLot.h" 30 31 #include "StringPrintStream.h" 31 #include "ThreadSpecific.h"32 32 #include "ThreadingPrimitives.h" 33 #include <condition_variable>34 #include <mutex>35 33 #include <thread> 36 34 37 35 namespace WTF { 38 36 39 namespace { 40 41 // This data structure serves three purposes: 42 // 43 // 1) A parking mechanism for threads that go to sleep. That involves just a system mutex and 44 // condition variable. 45 // 46 // 2) A queue node for when a thread is on some Lock's queue. 47 // 48 // 3) The queue head. This is kind of funky. When a thread is the head of a queue, it also serves as 49 // the basic queue bookkeeping data structure. When a thread is dequeued, the next thread in the 50 // queue takes on the queue head duties. 51 struct ThreadData { 52 // The parking mechanism. 53 bool shouldPark { false }; 54 std::mutex parkingLock; 55 std::condition_variable parkingCondition; 56 57 // The queue node. 58 ThreadData* nextInQueue { nullptr }; 59 60 // The queue itself. 61 ThreadData* queueTail { nullptr }; 62 }; 63 64 ThreadSpecific<ThreadData>* threadData; 65 66 ThreadData* myThreadData() 67 { 68 static std::once_flag initializeOnce; 69 std::call_once( 70 initializeOnce, 71 [] { 72 threadData = new ThreadSpecific<ThreadData>(); 73 }); 74 75 return *threadData; 76 } 77 78 } // anonymous namespace 37 static const bool verbose = false; 79 38 80 39 void LockBase::lockSlow() … … 86 45 87 46 for (;;) { 88 uintptr_t currentWordValue = m_word.load(); 89 90 if (!(currentWordValue & isLockedBit)) { 91 // It's not possible for someone to hold the queue lock while the lock itself is no longer 92 // held, since we will only attempt to acquire the queue lock when the lock is held and 93 // the queue lock prevents unlock. 94 ASSERT(!(currentWordValue & isQueueLockedBit)); 95 if (m_word.compareExchangeWeak(currentWordValue, currentWordValue | isLockedBit)) { 96 // Success! We acquired the lock. 97 return; 98 } 99 } 47 uint8_t currentByteValue = m_byte.load(); 48 if (verbose) 49 dataLog(toString(currentThread(), ": locking with ", currentByteValue, "\n")); 100 50 101 // If there is no queue and we haven't spun too much, we can just try to spin around again. 102 if (!(currentWordValue & ~queueHeadMask) && spinCount < spinLimit) { 51 // We allow ourselves to barge in. 52 if (!(currentByteValue & isHeldBit) 53 && m_byte.compareExchangeWeak(currentByteValue, currentByteValue | isHeldBit)) 54 return; 55 56 // If there is nobody parked and we haven't spun too much, we can just try to spin around. 57 if (!(currentByteValue & hasParkedBit) && spinCount < spinLimit) { 103 58 spinCount++; 104 59 std::this_thread::yield(); … … 106 61 } 107 62 108 // Need to put ourselves on the queue. Create the queue if one does not exist. This requries 109 // owning the queue for a little bit. The lock that controls the queue is itself a spinlock. 110 // But before we acquire the queue spinlock, we make sure that we have a ThreadData for this 111 // thread. 112 ThreadData* me = myThreadData(); 113 ASSERT(!me->shouldPark); 114 ASSERT(!me->nextInQueue); 115 ASSERT(!me->queueTail); 63 // Need to park. We do this by setting the parked bit first, and then parking. We spin around 64 // if the parked bit wasn't set and we failed at setting it. 65 if (!(currentByteValue & hasParkedBit) 66 && !m_byte.compareExchangeWeak(currentByteValue, currentByteValue | hasParkedBit)) 67 continue; 116 68 117 // Reload the current word value, since some time may have passed.118 currentWordValue = m_word.load();69 // We now expect the value to be isHeld|hasParked. So long as that's the case, we can park. 70 ParkingLot::compareAndPark(&m_byte, isHeldBit | hasParkedBit); 119 71 120 // We proceed only if the queue lock is not held, the Lock is held, and we succeed in 121 // acquiring the queue lock. 122 if ((currentWordValue & isQueueLockedBit) 123 || !(currentWordValue & isLockedBit) 124 || !m_word.compareExchangeWeak(currentWordValue, currentWordValue | isQueueLockedBit)) { 125 std::this_thread::yield(); 126 continue; 127 } 128 129 me->shouldPark = true; 130 131 // We own the queue. Nobody can enqueue or dequeue until we're done. Also, it's not possible 132 // to release the Lock while we hold the queue lock. 133 ThreadData* queueHead = bitwise_cast<ThreadData*>(currentWordValue & ~queueHeadMask); 134 if (queueHead) { 135 // Put this thread at the end of the queue. 136 queueHead->queueTail->nextInQueue = me; 137 queueHead->queueTail = me; 138 139 // Release the queue lock. 140 currentWordValue = m_word.load(); 141 ASSERT(currentWordValue & ~queueHeadMask); 142 ASSERT(currentWordValue & isQueueLockedBit); 143 ASSERT(currentWordValue & isLockedBit); 144 m_word.store(currentWordValue & ~isQueueLockedBit); 145 } else { 146 // Make this thread be the queue-head. 147 queueHead = me; 148 me->queueTail = me; 149 150 // Release the queue lock and install ourselves as the head. No need for a CAS loop, since 151 // we own the queue lock. 152 currentWordValue = m_word.load(); 153 ASSERT(~(currentWordValue & ~queueHeadMask)); 154 ASSERT(currentWordValue & isQueueLockedBit); 155 ASSERT(currentWordValue & isLockedBit); 156 uintptr_t newWordValue = currentWordValue; 157 newWordValue |= bitwise_cast<uintptr_t>(queueHead); 158 newWordValue &= ~isQueueLockedBit; 159 m_word.store(newWordValue); 160 } 161 162 // At this point everyone who acquires the queue lock will see me on the queue, and anyone who 163 // acquires me's lock will see that me wants to park. Note that shouldPark may have been 164 // cleared as soon as the queue lock was released above, but it will happen while the 165 // releasing thread holds me's parkingLock. 166 167 { 168 std::unique_lock<std::mutex> locker(me->parkingLock); 169 while (me->shouldPark) 170 me->parkingCondition.wait(locker); 171 } 172 173 ASSERT(!me->shouldPark); 174 ASSERT(!me->nextInQueue); 175 ASSERT(!me->queueTail); 176 177 // Now we can loop around and try to acquire the lock again. 72 // We have awaken, or we never parked because the byte value changed. Either way, we loop 73 // around and try again. 178 74 } 179 75 } … … 181 77 void LockBase::unlockSlow() 182 78 { 183 // The fast path can fail either because of spurious weak CAS failure, or because someone put a 184 // thread on the queue, or the queue lock is held. If the queue lock is held, it can only be 185 // because someone *will* enqueue a thread onto the queue. 186 187 // Acquire the queue lock, or release the lock. This loop handles both lock release in case the 188 // fast path's weak CAS spuriously failed and it handles queue lock acquisition if there is 189 // actually something interesting on the queue. 79 // Release the lock while finding out if someone is parked. Note that as soon as we do this, 80 // someone might barge in. 81 uint8_t oldByteValue; 190 82 for (;;) { 191 uintptr_t currentWordValue = m_word.load(); 192 193 ASSERT(currentWordValue & isLockedBit); 194 195 if (currentWordValue == isLockedBit) { 196 if (m_word.compareExchangeWeak(isLockedBit, 0)) { 197 // The fast path's weak CAS had spuriously failed, and now we succeeded. The lock is 198 // unlocked and we're done! 199 return; 200 } 201 // Loop around and try again. 202 std::this_thread::yield(); 203 continue; 204 } 205 206 if (currentWordValue & isQueueLockedBit) { 207 std::this_thread::yield(); 208 continue; 209 } 210 211 // If it wasn't just a spurious weak CAS failure and if the queue lock is not held, then there 212 // must be an entry on the queue. 213 ASSERT(currentWordValue & ~queueHeadMask); 214 215 if (m_word.compareExchangeWeak(currentWordValue, currentWordValue | isQueueLockedBit)) 83 oldByteValue = m_byte.load(); 84 if (verbose) 85 dataLog(toString(currentThread(), ": unlocking with ", oldByteValue, "\n")); 86 ASSERT(oldByteValue & isHeldBit); 87 if (m_byte.compareExchangeWeak(oldByteValue, 0)) 216 88 break; 217 89 } 218 90 219 uintptr_t currentWordValue = m_word.load(); 220 221 // After we acquire the queue lock, the Lock must still be held and the queue must be 222 // non-empty. The queue must be non-empty since only the lockSlow() method could have held the 223 // queue lock and if it did then it only releases it after putting something on the queue. 224 ASSERT(currentWordValue & isLockedBit); 225 ASSERT(currentWordValue & isQueueLockedBit); 226 ThreadData* queueHead = bitwise_cast<ThreadData*>(currentWordValue & ~queueHeadMask); 227 ASSERT(queueHead); 91 // Note that someone could try to park right now. If that happens, they will return immediately 92 // because our parking predicate is that m_byte == isHeldBit | hasParkedBit, but we've already set 93 // m_byte = 0. 228 94 229 ThreadData* newQueueHead = queueHead->nextInQueue; 230 // Either this was the only thread on the queue, in which case we delete the queue, or there 231 // are still more threads on the queue, in which case we create a new queue head. 232 if (newQueueHead) 233 newQueueHead->queueTail = queueHead->queueTail; 234 235 // Change the queue head, possibly removing it if newQueueHead is null. No need for a CAS loop, 236 // since we hold the queue lock and the lock itself so nothing about the lock can change right 237 // now. 238 currentWordValue = m_word.load(); 239 ASSERT(currentWordValue & isLockedBit); 240 ASSERT(currentWordValue & isQueueLockedBit); 241 ASSERT((currentWordValue & ~queueHeadMask) == bitwise_cast<uintptr_t>(queueHead)); 242 uintptr_t newWordValue = currentWordValue; 243 newWordValue &= ~isLockedBit; // Release the Lock. 244 newWordValue &= ~isQueueLockedBit; // Release the queue lock. 245 newWordValue &= queueHeadMask; // Clear out the old queue head. 246 newWordValue |= bitwise_cast<uintptr_t>(newQueueHead); // Install new queue head. 247 m_word.store(newWordValue); 248 249 // Now the lock is available for acquisition. But we just have to wake up the old queue head. 250 // After that, we're done! 251 252 queueHead->nextInQueue = nullptr; 253 queueHead->queueTail = nullptr; 254 255 // We do this carefully because this may run either before or during the parkingLock critical 256 // section in lockSlow(). 257 { 258 std::unique_lock<std::mutex> locker(queueHead->parkingLock); 259 queueHead->shouldPark = false; 260 } 261 // Doesn't matter if we notify_all() or notify_one() here since the only thread that could be 262 // waiting is queueHead. 263 queueHead->parkingCondition.notify_one(); 264 265 // The old queue head can now contend for the lock again. We're done! 95 // If there had been threads parked, unpark all of them. This causes a thundering herd, but while 96 // that is theoretically scary, it's totally fine in WebKit because we usually don't have enough 97 // threads for this to matter. 98 // FIXME: We don't really need this to exhibit thundering herd. We could use unparkOne(), and if 99 // that returns true, just set the parked bit again. If in the process of setting the parked bit 100 // we fail the CAS, then just unpark again. 101 if (oldByteValue & hasParkedBit) 102 ParkingLot::unparkAll(&m_byte); 266 103 } 267 104 -
trunk/Source/WTF/wtf/Lock.h
r188280 r188323 34 34 namespace WTF { 35 35 36 // A Lock is a fully adaptive mutex that gives you the best of SpinLock and Mutex. For small critical 37 // sections (that take nanoseconds), it will usually perform within 2x of a SpinLock in both the 38 // contended and uncontended case. When using a Mutex, such critical sections take up to 100x longer 39 // than Lock in the contended case, or 3x longer than Lock in the uncontended case. For longer 40 // critical sections (that take tens of microseconds), it will perform as well as a Mutex and slightly 41 // better than a SpinLock. But, crucially, a SpinLock will burn up to 90x more time in the kernel for 42 // such critical sections than either Lock or Mutex. Hence, using Lock will make the common case of 43 // locking perform close to SpinLock for any critical section that does more than a few nanoseconds of 44 // work while being as kind to the scheduler for longer critical sections as a Mutex. 45 // 46 // Like SpinLock, Lock takes very little memory - just sizeof(void*), though see a detailed caveat 47 // below. 48 // 49 // Generally, you should use Lock instead of SpinLock because while it penalizes you slightly, you 50 // make up for it by not wasting CPU cycles in case of contention. 51 // 52 // The Lock has the following nice properties: 53 // 54 // - Uncontended fast paths for lock acquisition and lock release that are almost as fast as the 55 // uncontended fast paths of a spinlock. The only overhead is that the spinlock will not CAS on 56 // release, while Lock will CAS. This overhead *can* slow things down for extremely small critical 57 // sections that do little or nothing - it makes them 2x slower since in that case, CAS is the most 58 // expensive instruction and having two of them is twice as bad as just having one. However, this 59 // lock implementation is still almost 3x faster than a platform mutex in those cases. It's unlikely 60 // that you'll encounter no-op critical sections, so usually, this lock is better than a spinlock. 61 // 62 // - Contended fast path that attempts to spin and yield for some number of times. For critical 63 // sections that are held only briefly, this allows Lock to perform almost as well as a SpinLock. 64 // SpinLock can still be almost 2x faster than Lock if the critical section is a no-op, but this 65 // advantage diminishes as the critical section grows. 66 // 67 // - Contended slow path that enqueues the contending thread and causes it to wait on a condition 68 // variable until the lock is released. This is the only case in which system mutexes and condition 69 // variables are used. This case is rare and self-limiting: it will only happen when a lock is held 70 // for long enough that spinning some number of times doesn't acquire it. The fact that Lock does 71 // this as a fallback when spinning for some number of times fails means that it will burn 72 // dramatically fewer CPU cycles - for example with 10 threads on an 8 logical CPU machine acquiring 73 // a critical section that takes 50 microseconds, the WTF SpinLock will cause 90x more time to be 74 // spent in the kernel than Lock. 75 // 76 // - Very low memory usage. Each Lock requires only sizeof(void*) memory. When the contended slow 77 // path is activated, Lock only relies on each thread having a preallocated thread-specific data 78 // structure called ThreadData that, together with the Lock itself, is used to build up a thread 79 // queue. So, the total memory usage of all Locks is still bounded by: 80 // 81 // numberOfLocks * sizeof(void*) + numberOfThreads * sizeof(ThreadData) 82 // 83 // Where ThreadData is a decently large data structure, but we will only ever have one per thread, 84 // regardless of the number of Locks in memory. Another way to view this is that the worst case 85 // memory usage per Lock is: 86 // 87 // sizeof(void*) + numberOfThreads / numberOfLocks * sizeof(ThreadData) 88 // 89 // So, unless you have a small number of Locks (or, a large number of threads, which is far less 90 // likely), the memory usage per-Lock is still going to be somewhere around sizeof(void*). 91 // 92 // - Barging fast paths. The Lock is tuned for maximum throughput rather than maximum fairness. If 93 // a thread releases a Lock that was contended and had a queue of waiting threads, then it will 94 // wake up the head of the queue, but it will also mark the lock as being available. This means that 95 // some other thread that is just now attempting to acquire the lock may get it before the thread 96 // that got woken up. When a thread barges into the lock, the thread that got woken up will simply 97 // go back to the end of the queue. The barging behavior ends up being probabilistic on most 98 // platforms and even though it may be unfair to some thread at some moment in time, it will rarely 99 // have a long streak of unfairness towards any particular thread: eventually each thread waiting on 100 // the lock will get to have a turn so long as no thread just holds the lock forever. That said, 101 // there *is* a chance of pathologies - users of Lock should not depend on first-in, first-out lock 102 // acquisition order under contention. The same caveat is generally true of SpinLock and platform 103 // mutexes on some platforms. 104 105 // FIXME: We could make this Lock more efficient by using ParkingLot and atomic add instead of CAS. 106 // The lock would be available if the 32-bit lock word <= 1. Locking would atomically add 2 to the 107 // word. The lock would be known to be held if the old value of the word was <= 1. The low bit 108 // just indicates if anyone is waiting. If the word was >= 2 after the atomic add, we would go to a 109 // slow path that repeatedly attempts to set the lock word to 3 [sic] from 0 or 1, and parks if that's 110 // not possible. The slow path could also perform defensive fixup that drops the lock value down to 111 // 3 if it was greater than 3, anytime that it needed to go to park. The unlock fast path would 112 // atomically subtract 2. If the decrement operation does not cause the count to zero, it would go to 113 // a slow path. The slow path would unpark one. If unparking returns false, the slow path would 114 // attempt a strong CAS from 1 to 0. It wouldn't do anything if the CAS fails, since the only goal of 115 // that CAS is to tell future unlock fast paths that there is possibly a thread parked. As such the 116 // states of the lock are: 117 // 118 // 0: lock available and nobody waiting 119 // 1: lock available and there may be threads waiting 120 // 2: lock taken and no threads waiting 121 // >=3: lock taken and threads waiting 122 // 123 // It may be possible to design an even better lock implementation based on atomic increments rather 124 // than atomic +=2/-=2. 125 // 126 // Note that because ParkingLot uses this lock internally, we would probably rename this lock 127 // implementation to something like BootstrapLock or even make it part of an anonymous namespace 128 // inside ParkingLot. 129 // 130 // https://bugs.webkit.org/show_bug.cgi?id=147841 36 // This is a fully adaptive mutex that only requires 1 byte of storage. It has fast paths that are 37 // competetive to SpinLock (uncontended locking is inlined and is just a CAS, microcontention is 38 // handled by spinning and yielding), and a slow path that is competetive to Mutex (if a lock cannot 39 // be acquired in a short period of time, the thread is put to sleep until the lock is available 40 // again). It uses less memory than either SpinLock or Mutex. 131 41 132 42 // This is a struct without a constructor or destructor so that it can be statically initialized. … … 135 45 void lock() 136 46 { 137 if (LIKELY(m_ word.compareExchangeWeak(0, isLockedBit, std::memory_order_acquire))) {47 if (LIKELY(m_byte.compareExchangeWeak(0, isHeldBit, std::memory_order_acquire))) { 138 48 // Lock acquired! 139 49 return; … … 145 55 void unlock() 146 56 { 147 if (LIKELY(m_ word.compareExchangeWeak(isLockedBit, 0, std::memory_order_release))) {148 // Lock released ,and nobody was waiting!57 if (LIKELY(m_byte.compareExchangeWeak(isHeldBit, 0, std::memory_order_release))) { 58 // Lock released and nobody was waiting! 149 59 return; 150 60 } … … 155 65 bool isHeld() const 156 66 { 157 return m_ word.load(std::memory_order_acquire) & isLockedBit;67 return m_byte.load(std::memory_order_acquire) & isHeldBit; 158 68 } 159 69 … … 164 74 165 75 protected: 166 static const uintptr_t isLockedBit = 1; 167 static const uintptr_t isQueueLockedBit = 2; 168 static const uintptr_t queueHeadMask = 3; 76 static const uint8_t isHeldBit = 1; 77 static const uint8_t hasParkedBit = 2; 169 78 170 79 WTF_EXPORT_PRIVATE void lockSlow(); 171 80 WTF_EXPORT_PRIVATE void unlockSlow(); 172 81 173 Atomic<uint ptr_t> m_word;82 Atomic<uint8_t> m_byte; 174 83 }; 175 84 … … 179 88 Lock() 180 89 { 181 m_ word.store(0, std::memory_order_relaxed);90 m_byte.store(0, std::memory_order_relaxed); 182 91 } 183 92 }; -
trunk/Source/WTF/wtf/ParkingLot.cpp
r188280 r188323 29 29 #include "DataLog.h" 30 30 #include "HashFunctions.h" 31 #include "Lock.h"32 31 #include "StringPrintStream.h" 33 32 #include "ThreadSpecific.h" 34 33 #include "ThreadingPrimitives.h" 35 34 #include "Vector.h" 35 #include "WordLock.h" 36 36 #include <condition_variable> 37 37 #include <mutex> … … 171 171 // This lock protects the entire bucket. Thou shall not make changes to Bucket without holding 172 172 // this lock. 173 Lock lock;173 WordLock lock; 174 174 175 175 // Put some distane between buckets in memory. This is one of several mitigations against false -
trunk/Tools/ChangeLog
r188311 r188323 1 2015-08-11 Filip Pizlo <fpizlo@apple.com> 2 3 Always use a byte-sized lock implementation 4 https://bugs.webkit.org/show_bug.cgi?id=147908 5 6 Reviewed by Geoffrey Garen. 7 8 All previous tests of Lock are now tests of WordLock. All previous tests of ByteLock are 9 now tests of Lock. 10 11 * TestWebKitAPI/Tests/WTF/Lock.cpp: 12 (TestWebKitAPI::runLockTest): 13 (TestWebKitAPI::TEST): 14 1 15 2015-08-11 Alexey Proskuryakov <ap@apple.com> 2 16 -
trunk/Tools/TestWebKitAPI/Tests/WTF/Lock.cpp
r188309 r188323 25 25 26 26 #include "config.h" 27 #include <wtf/ByteLock.h>28 27 #include <wtf/Lock.h> 29 28 #include <wtf/Threading.h> 30 29 #include <wtf/ThreadingPrimitives.h> 30 #include <wtf/WordLock.h> 31 31 32 32 using namespace WTF; … … 69 69 } 70 70 71 TEST(WTF_WordLock, UncontendedShortSection) 72 { 73 runLockTest<WordLock>(1, 1, 1, 10000000); 74 } 75 76 TEST(WTF_WordLock, UncontendedLongSection) 77 { 78 runLockTest<WordLock>(1, 1, 10000, 1000); 79 } 80 81 TEST(WTF_WordLock, ContendedShortSection) 82 { 83 runLockTest<WordLock>(1, 10, 1, 10000000); 84 } 85 86 TEST(WTF_WordLock, ContendedLongSection) 87 { 88 runLockTest<WordLock>(1, 10, 10000, 10000); 89 } 90 91 TEST(WTF_WordLock, ManyContendedShortSections) 92 { 93 runLockTest<WordLock>(10, 10, 1, 500000); 94 } 95 96 TEST(WTF_WordLock, ManyContendedLongSections) 97 { 98 runLockTest<WordLock>(10, 10, 10000, 1000); 99 } 100 71 101 TEST(WTF_Lock, UncontendedShortSection) 72 102 { … … 99 129 } 100 130 101 TEST(WTF_ ByteLock, UncontendedShortSection)131 TEST(WTF_Lock, SectionAddressCollision) 102 132 { 103 runLockTest<ByteLock>(1, 1, 1, 10000000); 104 } 105 106 TEST(WTF_ByteLock, UncontendedLongSection) 107 { 108 runLockTest<ByteLock>(1, 1, 10000, 1000); 109 } 110 111 TEST(WTF_ByteLock, ContendedShortSection) 112 { 113 runLockTest<ByteLock>(1, 10, 1, 10000000); 114 } 115 116 TEST(WTF_ByteLock, ContendedLongSection) 117 { 118 runLockTest<ByteLock>(1, 10, 10000, 10000); 119 } 120 121 TEST(WTF_ByteLock, ManyContendedShortSections) 122 { 123 runLockTest<ByteLock>(10, 10, 1, 500000); 124 } 125 126 TEST(WTF_ByteLock, ManyContendedLongSections) 127 { 128 runLockTest<ByteLock>(10, 10, 10000, 1000); 129 } 130 131 TEST(WTF_ByteLock, SectionAddressCollision) 132 { 133 runLockTest<ByteLock>(4, 2, 10000, 2000); 133 runLockTest<Lock>(4, 2, 10000, 2000); 134 134 } 135 135
Note: See TracChangeset
for help on using the changeset viewer.