Changeset 199092 in webkit
- Timestamp:
- Apr 5, 2016 8:55:11 PM (8 years ago)
- Location:
- trunk/Source/JavaScriptCore
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/ChangeLog
r199084 r199092 1 2016-04-05 Saam barati <sbarati@apple.com> 2 3 JSC SamplingProfiler: Use a thread + sleep loop instead of WTF::WorkQueue for taking samples 4 https://bugs.webkit.org/show_bug.cgi?id=154017 5 6 Reviewed by Geoffrey Garen. 7 8 By moving to an explicitly created seperate thread + sample-then-sleep 9 loop, we can remove a lot of the crufty code around WorkQueue. 10 We're also getting sample rates that are much closer to what we're 11 asking the OS for. When the sampling handler was built off of WorkQueue, 12 we'd often get sample rates much higher than the 1ms we asked for. On Kraken, 13 we would average about 1.7ms sample rates, even though we'd ask for a 1ms rate. 14 Now, on Kraken, we're getting about 1.2ms rates. Because we're getting 15 higher rates, this patch is a performance regression. It's slower because 16 we're sampling more frequently. 17 18 Before this patch, the sampling profiler had the following overhead: 19 - 10% on Kraken 20 - 12% on octane 21 - 15% on AsmBench 22 23 With this patch, the sampling profiler has the following overhead: 24 - 16% on Kraken 25 - 17% on Octane 26 - 30% on AsmBench 27 28 Comparatively, this new patch has the following overhead over the old sampling profiler: 29 - 5% on Kraken 30 - 3.5% on Octane 31 - 13% slower on AsmBench 32 33 * inspector/agents/InspectorScriptProfilerAgent.cpp: 34 (Inspector::InspectorScriptProfilerAgent::trackingComplete): 35 * runtime/SamplingProfiler.cpp: 36 (JSC::SamplingProfiler::SamplingProfiler): 37 (JSC::SamplingProfiler::~SamplingProfiler): 38 (JSC::SamplingProfiler::createThreadIfNecessary): 39 (JSC::SamplingProfiler::timerLoop): 40 (JSC::SamplingProfiler::takeSample): 41 (JSC::tryGetBytecodeIndex): 42 (JSC::SamplingProfiler::shutdown): 43 (JSC::SamplingProfiler::start): 44 (JSC::SamplingProfiler::pause): 45 (JSC::SamplingProfiler::noticeCurrentThreadAsJSCExecutionThread): 46 (JSC::SamplingProfiler::noticeJSLockAcquisition): 47 (JSC::SamplingProfiler::noticeVMEntry): 48 (JSC::SamplingProfiler::clearData): 49 (JSC::SamplingProfiler::stop): Deleted. 50 (JSC::SamplingProfiler::dispatchIfNecessary): Deleted. 51 (JSC::SamplingProfiler::dispatchFunction): Deleted. 52 * runtime/SamplingProfiler.h: 53 (JSC::SamplingProfiler::setTimingInterval): 54 (JSC::SamplingProfiler::setStopWatch): 55 * runtime/VM.cpp: 56 (JSC::VM::VM): 57 1 58 2016-04-05 Commit Queue <commit-queue@webkit.org> 2 59 -
trunk/Source/JavaScriptCore/inspector/agents/InspectorScriptProfilerAgent.cpp
r198867 r199092 203 203 RELEASE_ASSERT(samplingProfiler); 204 204 LockHolder locker(samplingProfiler->getLock()); 205 samplingProfiler-> stop(locker);205 samplingProfiler->pause(locker); 206 206 Vector<SamplingProfiler::StackTrace> stackTraces = samplingProfiler->releaseStackTraces(locker); 207 207 Ref<Protocol::ScriptProfiler::Samples> samples = buildSamples(m_environment.scriptDebugServer().vm(), WTFMove(stackTraces)); -
trunk/Source/JavaScriptCore/runtime/SamplingProfiler.cpp
r198867 r199092 187 187 , m_stopwatch(WTFMove(stopwatch)) 188 188 , m_timingInterval(std::chrono::microseconds(1000)) 189 , m_t imerQueue(WorkQueue::create("jsc.sampling-profiler.queue", WorkQueue::Type::Serial, WorkQueue::QOS::UserInteractive))189 , m_threadIdentifier(0) 190 190 , m_jscExecutionThread(nullptr) 191 , m_isActive(false)192 191 , m_isPaused(false) 193 , m_ hasDispatchedFunction(false)192 , m_isShutDown(false) 194 193 { 195 194 if (sReportStats) { … … 199 198 200 199 m_currentFrames.grow(256); 201 202 m_handler = [this] () { 203 LockHolder samplingProfilerLocker(m_lock); 204 if (!m_isActive || !m_jscExecutionThread || m_isPaused) { 205 m_hasDispatchedFunction = false; 206 deref(); 207 return; 208 } 209 210 if (m_vm.entryScope) { 211 double nowTime = m_stopwatch->elapsedTime(); 212 213 LockHolder machineThreadsLocker(m_vm.heap.machineThreads().getLock()); 214 LockHolder codeBlockSetLocker(m_vm.heap.codeBlockSet().getLock()); 215 LockHolder executableAllocatorLocker(m_vm.executableAllocator.getLock()); 216 217 bool didSuspend = m_jscExecutionThread->suspend(); 218 if (didSuspend) { 219 // While the JSC thread is suspended, we can't do things like malloc because the JSC thread 220 // may be holding the malloc lock. 221 ExecState* callFrame; 222 void* machinePC; 223 bool topFrameIsLLInt = false; 224 void* llintPC; 225 { 226 MachineThreads::Thread::Registers registers; 227 m_jscExecutionThread->getRegisters(registers); 228 callFrame = static_cast<ExecState*>(registers.framePointer()); 229 machinePC = registers.instructionPointer(); 230 llintPC = registers.llintPC(); 231 m_jscExecutionThread->freeRegisters(registers); 200 } 201 202 SamplingProfiler::~SamplingProfiler() 203 { 204 } 205 206 void SamplingProfiler::createThreadIfNecessary(const LockHolder&) 207 { 208 ASSERT(m_lock.isLocked()); 209 210 if (m_threadIdentifier) 211 return; 212 213 RefPtr<SamplingProfiler> profiler = this; 214 m_threadIdentifier = createThread("jsc.sampling-profiler.thread", [profiler] { 215 profiler->timerLoop(); 216 }); 217 } 218 219 void SamplingProfiler::timerLoop() 220 { 221 while (true) { 222 std::chrono::microseconds stackTraceProcessingTime = std::chrono::microseconds(0); 223 { 224 LockHolder locker(m_lock); 225 if (UNLIKELY(m_isShutDown)) 226 return; 227 228 if (!m_isPaused && m_jscExecutionThread) 229 takeSample(locker, stackTraceProcessingTime); 230 231 m_lastTime = m_stopwatch->elapsedTime(); 232 } 233 234 std::this_thread::sleep_for(m_timingInterval - std::min(m_timingInterval, stackTraceProcessingTime)); 235 } 236 } 237 238 void SamplingProfiler::takeSample(const LockHolder&, std::chrono::microseconds& stackTraceProcessingTime) 239 { 240 ASSERT(m_lock.isLocked()); 241 if (m_vm.entryScope) { 242 double nowTime = m_stopwatch->elapsedTime(); 243 244 LockHolder machineThreadsLocker(m_vm.heap.machineThreads().getLock()); 245 LockHolder codeBlockSetLocker(m_vm.heap.codeBlockSet().getLock()); 246 LockHolder executableAllocatorLocker(m_vm.executableAllocator.getLock()); 247 248 bool didSuspend = m_jscExecutionThread->suspend(); 249 if (didSuspend) { 250 // While the JSC thread is suspended, we can't do things like malloc because the JSC thread 251 // may be holding the malloc lock. 252 ExecState* callFrame; 253 void* machinePC; 254 bool topFrameIsLLInt = false; 255 void* llintPC; 256 { 257 MachineThreads::Thread::Registers registers; 258 m_jscExecutionThread->getRegisters(registers); 259 callFrame = static_cast<ExecState*>(registers.framePointer()); 260 machinePC = registers.instructionPointer(); 261 llintPC = registers.llintPC(); 262 m_jscExecutionThread->freeRegisters(registers); 263 } 264 // FIXME: Lets have a way of detecting when we're parsing code. 265 // https://bugs.webkit.org/show_bug.cgi?id=152761 266 if (m_vm.executableAllocator.isValidExecutableMemory(executableAllocatorLocker, machinePC)) { 267 if (m_vm.isExecutingInRegExpJIT) { 268 // FIXME: We're executing a regexp. Lets gather more intersting data. 269 // https://bugs.webkit.org/show_bug.cgi?id=152729 270 callFrame = m_vm.topCallFrame; // We need to do this or else we'd fail our backtrace validation b/c this isn't a JS frame. 232 271 } 233 // FIXME: Lets have a way of detecting when we're parsing code. 234 // https://bugs.webkit.org/show_bug.cgi?id=152761 235 if (m_vm.executableAllocator.isValidExecutableMemory(executableAllocatorLocker, machinePC)) { 236 if (m_vm.isExecutingInRegExpJIT) { 237 // FIXME: We're executing a regexp. Lets gather more intersting data. 238 // https://bugs.webkit.org/show_bug.cgi?id=152729 239 callFrame = m_vm.topCallFrame; // We need to do this or else we'd fail our backtrace validation b/c this isn't a JS frame. 240 } 241 } else if (LLInt::isLLIntPC(machinePC)) { 242 topFrameIsLLInt = true; 243 // We're okay to take a normal stack trace when the PC 244 // is in LLInt code. 245 } else { 246 // We resort to topCallFrame to see if we can get anything 247 // useful. We usually get here when we're executing C code. 248 callFrame = m_vm.topCallFrame; 272 } else if (LLInt::isLLIntPC(machinePC)) { 273 topFrameIsLLInt = true; 274 // We're okay to take a normal stack trace when the PC 275 // is in LLInt code. 276 } else { 277 // We resort to topCallFrame to see if we can get anything 278 // useful. We usually get here when we're executing C code. 279 callFrame = m_vm.topCallFrame; 280 } 281 282 size_t walkSize; 283 bool wasValidWalk; 284 bool didRunOutOfVectorSpace; 285 { 286 FrameWalker walker(callFrame, m_vm, codeBlockSetLocker, machineThreadsLocker); 287 walkSize = walker.walk(m_currentFrames, didRunOutOfVectorSpace); 288 wasValidWalk = walker.wasValidWalk(); 289 } 290 291 m_jscExecutionThread->resume(); 292 293 auto startTime = std::chrono::steady_clock::now(); 294 // We can now use data structures that malloc, and do other interesting things, again. 295 296 // FIXME: It'd be interesting to take data about the program's state when 297 // we fail to take a stack trace: https://bugs.webkit.org/show_bug.cgi?id=152758 298 if (wasValidWalk && walkSize) { 299 if (sReportStats) 300 sNumTotalStackTraces++; 301 Vector<UnprocessedStackFrame> stackTrace; 302 stackTrace.reserveInitialCapacity(walkSize); 303 for (size_t i = 0; i < walkSize; i++) { 304 UnprocessedStackFrame frame = m_currentFrames[i]; 305 stackTrace.uncheckedAppend(frame); 249 306 } 250 307 251 size_t walkSize; 252 bool wasValidWalk; 253 bool didRunOutOfVectorSpace; 254 { 255 FrameWalker walker(callFrame, m_vm, codeBlockSetLocker, machineThreadsLocker); 256 walkSize = walker.walk(m_currentFrames, didRunOutOfVectorSpace); 257 wasValidWalk = walker.wasValidWalk(); 258 } 259 260 m_jscExecutionThread->resume(); 261 262 // We can now use data structures that malloc, and do other interesting things, again. 263 264 // FIXME: It'd be interesting to take data about the program's state when 265 // we fail to take a stack trace: https://bugs.webkit.org/show_bug.cgi?id=152758 266 if (wasValidWalk && walkSize) { 267 if (sReportStats) 268 sNumTotalStackTraces++; 269 Vector<UnprocessedStackFrame> stackTrace; 270 stackTrace.reserveInitialCapacity(walkSize); 271 for (size_t i = 0; i < walkSize; i++) { 272 UnprocessedStackFrame frame = m_currentFrames[i]; 273 stackTrace.uncheckedAppend(frame); 274 } 275 276 m_unprocessedStackTraces.append(UnprocessedStackTrace { nowTime, machinePC, topFrameIsLLInt, llintPC, WTFMove(stackTrace) }); 277 278 if (didRunOutOfVectorSpace) 279 m_currentFrames.grow(m_currentFrames.size() * 1.25); 280 } 281 } 282 } 283 284 m_lastTime = m_stopwatch->elapsedTime(); 285 286 dispatchFunction(samplingProfilerLocker); 287 }; 288 } 289 290 SamplingProfiler::~SamplingProfiler() 291 { 308 m_unprocessedStackTraces.append(UnprocessedStackTrace { nowTime, machinePC, topFrameIsLLInt, llintPC, WTFMove(stackTrace) }); 309 310 if (didRunOutOfVectorSpace) 311 m_currentFrames.grow(m_currentFrames.size() * 1.25); 312 } 313 314 auto endTime = std::chrono::steady_clock::now(); 315 stackTraceProcessingTime = std::chrono::duration_cast<std::chrono::microseconds>(endTime - startTime); 316 } 317 } 292 318 } 293 319 … … 487 513 void SamplingProfiler::shutdown() 488 514 { 489 stop(); 515 LockHolder locker(m_lock); 516 m_isShutDown = true; 490 517 } 491 518 … … 499 526 { 500 527 ASSERT(m_lock.isLocked()); 501 m_isActive = true; 502 dispatchIfNecessary(locker); 503 } 504 505 void SamplingProfiler::stop() 506 { 507 LockHolder locker(m_lock); 508 stop(locker); 509 } 510 511 void SamplingProfiler::stop(const LockHolder&) 528 m_isPaused = false; 529 createThreadIfNecessary(locker); 530 } 531 532 void SamplingProfiler::pause(const LockHolder&) 512 533 { 513 534 ASSERT(m_lock.isLocked()); 514 m_isActive = false;515 reportStats();516 }517 518 void SamplingProfiler::pause()519 {520 LockHolder locker(m_lock);521 535 m_isPaused = true; 522 536 reportStats(); … … 533 547 LockHolder locker(m_lock); 534 548 noticeCurrentThreadAsJSCExecutionThread(locker); 535 }536 537 void SamplingProfiler::dispatchIfNecessary(const LockHolder& locker)538 {539 if (m_isActive && !m_hasDispatchedFunction && m_jscExecutionThread && m_vm.entryScope) {540 ref(); // Matching deref() is inside m_handler when m_handler stops recursing.541 dispatchFunction(locker);542 }543 }544 545 void SamplingProfiler::dispatchFunction(const LockHolder&)546 {547 m_hasDispatchedFunction = true;548 m_isPaused = false;549 m_lastTime = m_stopwatch->elapsedTime();550 m_timerQueue->dispatchAfter(m_timingInterval, m_handler);551 549 } 552 550 … … 563 561 noticeCurrentThreadAsJSCExecutionThread(locker); 564 562 m_lastTime = m_stopwatch->elapsedTime(); 565 dispatchIfNecessary(locker);563 createThreadIfNecessary(locker); 566 564 } 567 565 -
trunk/Source/JavaScriptCore/runtime/SamplingProfiler.h
r198867 r199092 130 130 JS_EXPORT_PRIVATE void start(); 131 131 void start(const LockHolder&); 132 void stop();133 void stop(const LockHolder&);134 132 Vector<StackTrace> releaseStackTraces(const LockHolder&); 135 133 JS_EXPORT_PRIVATE String stackTracesAsJSON(); … … 138 136 void processUnverifiedStackTraces(); // You should call this only after acquiring the lock. 139 137 void setStopWatch(const LockHolder&, Ref<Stopwatch>&& stopwatch) { m_stopwatch = WTFMove(stopwatch); } 138 void pause(const LockHolder&); 140 139 141 140 private: 142 void dispatchIfNecessary(const LockHolder&);143 void dispatchFunction(const LockHolder&);144 void pause();145 141 void clearData(const LockHolder&); 142 void createThreadIfNecessary(const LockHolder&); 143 void timerLoop(); 144 void takeSample(const LockHolder&, std::chrono::microseconds& stackTraceProcessingTime); 146 145 147 146 VM& m_vm; … … 151 150 std::chrono::microseconds m_timingInterval; 152 151 double m_lastTime; 153 Ref<WorkQueue> m_timerQueue;154 std::function<void ()> m_handler;155 152 Lock m_lock; 153 ThreadIdentifier m_threadIdentifier; 156 154 MachineThreads::Thread* m_jscExecutionThread; 157 bool m_isActive;158 155 bool m_isPaused; 159 bool m_ hasDispatchedFunction;156 bool m_isShutDown; 160 157 HashSet<JSCell*> m_liveCellPointers; 161 158 Vector<UnprocessedStackFrame> m_currentFrames; -
trunk/Source/JavaScriptCore/runtime/VM.cpp
r199076 r199092 319 319 if (Options::useSamplingProfiler()) { 320 320 setShouldBuildPCToCodeOriginMapping(); 321 m_samplingProfiler = adoptRef(new SamplingProfiler(*this, Stopwatch::create())); 321 Ref<Stopwatch> stopwatch = Stopwatch::create(); 322 stopwatch->start(); 323 m_samplingProfiler = adoptRef(new SamplingProfiler(*this, WTFMove(stopwatch))); 322 324 m_samplingProfiler->start(); 323 325 }
Note: See TracChangeset
for help on using the changeset viewer.