Changeset 228306 in webkit
- Timestamp:
- Feb 8, 2018 6:13:01 PM (6 years ago)
- Location:
- trunk/Source
- Files:
-
- 2 added
- 43 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/JavaScriptCore/ChangeLog
r228302 r228306 1 2018-02-08 Filip Pizlo <fpizlo@apple.com> 2 3 Experiment with alternative implementation of memcpy/memset 4 https://bugs.webkit.org/show_bug.cgi?id=182563 5 6 Reviewed by Michael Saboff and Mark Lam. 7 8 This adopts new fastCopy/fastZeroFill calls for calls to memcpy/memset that do not take a 9 constant size argument. 10 11 * assembler/AssemblerBuffer.h: 12 (JSC::AssemblerBuffer::append): 13 * runtime/ArrayBuffer.cpp: 14 (JSC::ArrayBufferContents::tryAllocate): 15 (JSC::ArrayBufferContents::copyTo): 16 (JSC::ArrayBuffer::createInternal): 17 * runtime/ArrayBufferView.h: 18 (JSC::ArrayBufferView::zeroRangeImpl): 19 * runtime/ArrayConventions.cpp: 20 * runtime/ArrayConventions.h: 21 (JSC::clearArray): 22 * runtime/ArrayPrototype.cpp: 23 (JSC::arrayProtoPrivateFuncConcatMemcpy): 24 * runtime/ButterflyInlines.h: 25 (JSC::Butterfly::tryCreate): 26 (JSC::Butterfly::createOrGrowPropertyStorage): 27 (JSC::Butterfly::growArrayRight): 28 (JSC::Butterfly::resizeArray): 29 * runtime/GenericTypedArrayViewInlines.h: 30 (JSC::GenericTypedArrayView<Adaptor>::create): 31 * runtime/JSArray.cpp: 32 (JSC::JSArray::appendMemcpy): 33 (JSC::JSArray::fastSlice): 34 * runtime/JSArrayBufferView.cpp: 35 (JSC::JSArrayBufferView::ConstructionContext::ConstructionContext): 36 * runtime/JSGenericTypedArrayViewInlines.h: 37 (JSC::JSGenericTypedArrayView<Adaptor>::set): 38 * runtime/JSObject.cpp: 39 (JSC::JSObject::constructConvertedArrayStorageWithoutCopyingElements): 40 (JSC::JSObject::shiftButterflyAfterFlattening): 41 * runtime/PropertyTable.cpp: 42 (JSC::PropertyTable::PropertyTable): 43 1 44 2018-02-08 Don Olmstead <don.olmstead@sony.com> 2 45 -
trunk/Source/JavaScriptCore/assembler/AssemblerBuffer.h
r206525 r228306 1 1 /* 2 * Copyright (C) 2008 , 2012, 2014Apple Inc. All rights reserved.2 * Copyright (C) 2008-2018 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 277 277 grow(size); 278 278 279 memcpy(m_storage.buffer() + m_index, data, size);279 fastCopyBytes(m_storage.buffer() + m_index, data, size); 280 280 m_index += size; 281 281 } -
trunk/Source/JavaScriptCore/heap/LargeAllocation.cpp
r227721 r228306 46 46 47 47 // Make sure that the padding does not contain useful things. 48 memset(static_cast<char*>(space) + sizeBeforeDistancing, 0, distancing);48 fastZeroFillBytes(static_cast<char*>(space) + sizeBeforeDistancing, distancing); 49 49 50 50 if (scribbleFreeCells()) -
trunk/Source/JavaScriptCore/heap/MarkedBlock.cpp
r228149 r228306 494 494 return; 495 495 496 memset(&block(), 0, endAtom * atomSize);496 fastZeroFillBytes(&block(), endAtom * atomSize); 497 497 m_securityOriginToken = securityOriginToken; 498 498 } -
trunk/Source/JavaScriptCore/runtime/ArrayBuffer.cpp
r221439 r228306 114 114 115 115 if (policy == ZeroInitialize) 116 memset(m_data.get(), 0, size);116 fastZeroFillBytes(m_data.get(), size); 117 117 118 118 m_sizeInBytes = numElements * elementByteSize; … … 142 142 if (!other.m_data) 143 143 return; 144 memcpy(other.m_data.get(), m_data.get(), m_sizeInBytes);144 fastCopyBytes(other.m_data.get(), m_data.get(), m_sizeInBytes); 145 145 other.m_sizeInBytes = m_sizeInBytes; 146 146 } … … 247 247 ASSERT(!byteLength || source); 248 248 auto buffer = adoptRef(*new ArrayBuffer(WTFMove(contents))); 249 memcpy(buffer->data(), source, byteLength);249 fastCopyBytes(buffer->data(), source, byteLength); 250 250 return buffer; 251 251 } -
trunk/Source/JavaScriptCore/runtime/ArrayBufferView.h
r225123 r228306 216 216 217 217 uint8_t* base = static_cast<uint8_t*>(baseAddress()); 218 memset(base + byteOffset, 0, rangeByteLength);218 fastZeroFillBytes(base + byteOffset, rangeByteLength); 219 219 return true; 220 220 } -
trunk/Source/JavaScriptCore/runtime/ArrayConventions.cpp
r205611 r228306 32 32 33 33 #if USE(JSVALUE64) 34 void clearArrayMemset(WriteBarrier<Unknown>* base, unsigned count)35 {36 #if CPU(X86_64) && COMPILER(GCC_OR_CLANG)37 uint64_t zero = 0;38 asm volatile (39 "rep stosq\n\t"40 : "+D"(base), "+c"(count)41 : "a"(zero)42 : "memory"43 );44 #else // not CPU(X86_64)45 memset(base, 0, count * sizeof(WriteBarrier<Unknown>));46 #endif // generic CPU47 }48 49 34 void clearArrayMemset(double* base, unsigned count) 50 35 { -
trunk/Source/JavaScriptCore/runtime/ArrayConventions.h
r222384 r228306 118 118 119 119 #if USE(JSVALUE64) 120 JS_EXPORT_PRIVATE void clearArrayMemset(WriteBarrier<Unknown>* base, unsigned count);121 120 JS_EXPORT_PRIVATE void clearArrayMemset(double* base, unsigned count); 122 121 #endif // USE(JSVALUE64) … … 125 124 { 126 125 #if USE(JSVALUE64) 127 const unsigned minCountForMemset = 100; 128 if (count >= minCountForMemset) { 129 clearArrayMemset(base, count); 130 return; 131 } 132 #endif 133 126 fastZeroFill(base, count); 127 #else 134 128 for (unsigned i = count; i--;) 135 129 base[i].clear(); 130 #endif 136 131 } 137 132 -
trunk/Source/JavaScriptCore/runtime/ArrayPrototype.cpp
r228266 r228306 1342 1342 if (type == ArrayWithDouble) { 1343 1343 double* buffer = result->butterfly()->contiguousDouble().data(); 1344 memcpy(buffer, firstButterfly->contiguousDouble().data(), sizeof(JSValue) *firstArraySize);1345 memcpy(buffer + firstArraySize, secondButterfly->contiguousDouble().data(), sizeof(JSValue) *secondArraySize);1344 fastCopy(buffer, firstButterfly->contiguousDouble().data(), firstArraySize); 1345 fastCopy(buffer + firstArraySize, secondButterfly->contiguousDouble().data(), secondArraySize); 1346 1346 } else if (type != ArrayWithUndecided) { 1347 1347 WriteBarrier<Unknown>* buffer = result->butterfly()->contiguous().data(); … … 1349 1349 auto copy = [&] (unsigned offset, void* source, unsigned size, IndexingType type) { 1350 1350 if (type != ArrayWithUndecided) { 1351 memcpy(buffer + offset, source, sizeof(JSValue) *size);1351 fastCopy(buffer + offset, static_cast<WriteBarrier<Unknown>*>(source), size); 1352 1352 return; 1353 1353 } 1354 1354 1355 for (unsigned i = size; i--;) 1356 buffer[i + offset].clear(); 1355 clearArray(buffer + offset, size); 1357 1356 }; 1358 1357 -
trunk/Source/JavaScriptCore/runtime/ButterflyInlines.h
r227617 r228306 1 1 /* 2 * Copyright (C) 2012-201 7Apple Inc. All rights reserved.2 * Copyright (C) 2012-2018 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 94 94 if (hasIndexingHeader) 95 95 *result->indexingHeader() = indexingHeader; 96 memset(result->propertyStorage() - propertyCapacity, 0, propertyCapacity * sizeof(EncodedJSValue));96 fastZeroFill(result->propertyStorage() - propertyCapacity, propertyCapacity); 97 97 return result; 98 98 } … … 130 130 Butterfly* result = createUninitialized( 131 131 vm, intendedOwner, preCapacity, newPropertyCapacity, hasIndexingHeader, indexingPayloadSizeInBytes); 132 memcpy(132 fastCopyBytes( 133 133 result->propertyStorage() - oldPropertyCapacity, 134 134 oldButterfly->propertyStorage() - oldPropertyCapacity, 135 135 totalSize(0, oldPropertyCapacity, hasIndexingHeader, indexingPayloadSizeInBytes)); 136 memset(136 fastZeroFill( 137 137 result->propertyStorage() - newPropertyCapacity, 138 0, 139 (newPropertyCapacity - oldPropertyCapacity) * sizeof(EncodedJSValue)); 138 newPropertyCapacity - oldPropertyCapacity); 140 139 return result; 141 140 } … … 169 168 if (!newBase) 170 169 return nullptr; 171 // FIXME: This probably shouldn't be a memcpy. 172 memcpy(newBase, theBase, oldSize); 170 fastCopyBytes(newBase, theBase, oldSize); 173 171 return fromBase(newBase, 0, propertyCapacity); 174 172 } … … 200 198 totalSize(0, propertyCapacity, oldHasIndexingHeader, oldIndexingPayloadSizeInBytes), 201 199 totalSize(0, propertyCapacity, newHasIndexingHeader, newIndexingPayloadSizeInBytes)); 202 memcpy(to, from, size);200 fastCopyBytes(to, from, size); 203 201 return result; 204 202 } -
trunk/Source/JavaScriptCore/runtime/GenericTypedArrayViewInlines.h
r212535 r228306 1 1 /* 2 * Copyright (C) 2013 , 2016Apple Inc. All rights reserved.2 * Copyright (C) 2013-2018 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 53 53 { 54 54 RefPtr<GenericTypedArrayView> result = create(length); 55 memcpy(result->data(), array, length * sizeof(typename Adaptor::Type));55 fastCopy(result->data(), array, length); 56 56 return result; 57 57 } -
trunk/Source/JavaScriptCore/runtime/JSArray.cpp
r227906 r228306 554 554 } 555 555 } else if (type == ArrayWithDouble) 556 memcpy(butterfly()->contiguousDouble().data() + startIndex, otherArray->butterfly()->contiguousDouble().data(), sizeof(JSValue) *otherLength);556 fastCopy(butterfly()->contiguousDouble().data() + startIndex, otherArray->butterfly()->contiguousDouble().data(), otherLength); 557 557 else 558 memcpy(butterfly()->contiguous().data() + startIndex, otherArray->butterfly()->contiguous().data(), sizeof(JSValue) *otherLength);558 fastCopy(butterfly()->contiguous().data() + startIndex, otherArray->butterfly()->contiguous().data(), otherLength); 559 559 560 560 return true; … … 762 762 auto& resultButterfly = *resultArray->butterfly(); 763 763 if (arrayType == ArrayWithDouble) 764 memcpy(resultButterfly.contiguousDouble().data(), butterfly()->contiguousDouble().data() + startIndex, sizeof(JSValue) *count);764 fastCopy(resultButterfly.contiguousDouble().data(), butterfly()->contiguousDouble().data() + startIndex, count); 765 765 else 766 memcpy(resultButterfly.contiguous().data(), butterfly()->contiguous().data() + startIndex, sizeof(JSValue) *count);766 fastCopy(resultButterfly.contiguous().data(), butterfly()->contiguous().data() + startIndex, count); 767 767 resultButterfly.setPublicLength(count); 768 768 -
trunk/Source/JavaScriptCore/runtime/JSArrayBufferView.cpp
r227874 r228306 95 95 return; 96 96 if (mode == ZeroFill) 97 memset(m_vector.get(), 0, size);97 fastZeroFillBytes(m_vector.get(), size); 98 98 99 99 vm.heap.reportExtraMemoryAllocated(static_cast<size_t>(length) * elementSize); -
trunk/Source/JavaScriptCore/runtime/JSGenericTypedArrayViewInlines.h
r227874 r228306 247 247 const ClassInfo* ci = object->classInfo(vm); 248 248 if (ci->typedArrayStorageType == Adaptor::typeValue) { 249 // The super fast case: we can just mem cpysince we're the same type.249 // The super fast case: we can just memmove since we're the same type. 250 250 JSGenericTypedArrayView* other = jsCast<JSGenericTypedArrayView*>(object); 251 251 length = std::min(length, other->length()); -
trunk/Source/JavaScriptCore/runtime/JSObject.cpp
r227906 r228306 1179 1179 vm, this, 0, propertyCapacity, true, ArrayStorage::sizeFor(neededLength)); 1180 1180 1181 memcpy(1181 fastCopy( 1182 1182 newButterfly->propertyStorage() - propertySize, 1183 1183 m_butterfly->propertyStorage() - propertySize, 1184 propertySize * sizeof(EncodedJSValue));1184 propertySize); 1185 1185 1186 1186 ArrayStorage* newStorage = newButterfly->arrayStorage(); … … 3581 3581 void* newBase = newButterfly->base(0, outOfLineCapacityAfter); 3582 3582 3583 memcpy(newBase, currentBase, Butterfly::totalSize(0, outOfLineCapacityAfter, hasIndexingHeader, indexingPayloadSizeInBytes));3583 fastCopyBytes(newBase, currentBase, Butterfly::totalSize(0, outOfLineCapacityAfter, hasIndexingHeader, indexingPayloadSizeInBytes)); 3584 3584 3585 3585 setButterfly(vm, newButterfly); -
trunk/Source/JavaScriptCore/runtime/PropertyTable.cpp
r217108 r228306 75 75 ASSERT(isPowerOf2(m_indexSize)); 76 76 77 memcpy(m_index, other.m_index, dataSize());77 fastCopyBytes(m_index, other.m_index, dataSize()); 78 78 79 79 iterator end = this->end(); -
trunk/Source/WTF/ChangeLog
r228260 r228306 1 2018-02-08 Filip Pizlo <fpizlo@apple.com> 2 3 Experiment with alternative implementation of memcpy/memset 4 https://bugs.webkit.org/show_bug.cgi?id=182563 5 6 Reviewed by Michael Saboff and Mark Lam. 7 8 Adds a faster x86_64-specific implementation of memcpy and memset. These versions go by 9 different names than memcpy/memset and have a different API: 10 11 WTF::fastCopy<T>(T* dst, T* src, size_t N): copies N values of type T from src to dst. 12 WTF::fastZeroFill(T* dst, size_T N): writes N * sizeof(T) zeroes to dst. 13 14 There are also *Bytes variants that take void* for dst and src and size_t numBytes. Those are 15 most appropriate in places where the code is already computing bytes. 16 17 These will just call memcpy/memset on platforms where the optimized versions are not supported. 18 19 These new functions are not known to the compiler to be memcpy/memset. This has the effect that 20 the compiler will not try to replace them with anything else. This could be good or bad: 21 22 - It's *good* if the size is *not known* at compile time. In that case, by my benchmarks, these 23 versions are faster than either the memcpy/memset call or whatever else the compiler could 24 emit. This is because of a combination of inlining and the algorithm itself (see below). 25 26 - It's *bad* if the size is *known* at compile time. In that case, the compiler could 27 potentially emit a fully unrolled memcpy/memset. That might not happen if the size is large 28 (even if it's known), but in this patch I avoid replacing any memcpy/memset calls when the 29 size is a constant. In particular, this totally avoids the call overhead -- if the size is 30 small, then the compiler will emit a nice inlined copy or set. If the size is large, then the 31 most optimal thing to do is emit the shortest piece of code possible, and that's a call to 32 memcpy/memset. 33 34 It's unfortunate that you have to choose between them on your own. One way to avoid that might 35 have been to override the memcpy/memset symbols, so that the compiler can still do its 36 reasoning. But that's not quite right, since then we would lose inlining in the unknonw-size 37 case. Also, it's possible that for some unknown-size cases, the compiler could choose to emit 38 something on its own because it might think that some property of aliasing or alignment could 39 help it. I think it's a bit better to use our own copy/set implementations even in those cases. 40 Another way that I tried avoiding this is to detect inside fastCopy/fastZeroFill if the size is 41 constant. But there is no good way to do that in C++. There is a builtin for doing that inside a 42 macro, but that feels janky, so I didn't want to do it in this patch. 43 44 The reason why these new fastCopy/fastZeroFill functions are faster is that: 45 46 - They can be inlined. There is no function call. Only a few registers get clobbered. So, the 47 impact on the quality of the code surrounding the memcpy/memset is smaller. 48 49 - They use type information to select the implementation. For sizes that are multiples of 2, 4, 50 or 8, the resulting code performs dramatically better on small arrays than memcpy because it 51 uses fewer cycles. The difference is greatest for 2 and 4 byte types, since memcpy usually 52 handles small arrays by tiering from a 8-byte word copy loop to a byte copy loop. So, for 2 53 or 4 byte arrays, we use an algorithm that tiers from 8-byte word down to a 2-byte or 4-byte 54 (depending on type) copy loop. So, for example, when copying a 16-bit string that has 1, 2, or 55 3 characters, this means doing 1, 2, or 3 word copies rather than 2, 4, or 6 byte copies. For 56 8-byte types, the resulting savings are mainly that there is no check to see if a tier-down to 57 the byte-copy loop is needed -- so really that means reducing code size. 1-byte types don't 58 get this inherent advantage over memcpy/memset, but they still benefit from all of the other 59 advantages of these functions. Of course, this advantage isn't inherent to our approach. The 60 compiler could also notice that the arguments to memcpy/memset have some alignment properties. 61 It could do it even more generally than we do - for example a copy over bytes where the size 62 is a multiple of 4 can use the 4-byte word algorithm. But based on my tests, the compiler does 63 not do this (even though it does other things, like turn a memset call with a zero value 64 argument into a bzero call). 65 66 - They use a very nicely written word copy/set loop for small arrays. I spent a lot of time 67 getting the assembly just right. When we use memcpy/memset, sometimes we would optimize the 68 call by having a fast path word copy loop for small sizes. That's not necessary with this 69 implementation, since the assembly copy loop gets inlined. 70 71 - They use `rep movs` or `rep stos` for copies of 200 bytes or more. This decision benchmarks 72 poorly on every synthetic memcpy/memset benchmark I have built, and so unsurprisingly, it's 73 not what system memcpy/memset does. Most system memcpy/memset implementations end up doing 74 some SSE for medium-sized copies,. However, I previously found that this decision is bad for 75 one of the memset calls in GC (see clearArray() and friends in ArrayConventions.h|cpp) - I was 76 able to make the overhead of that call virtually disappear by doing `rep stos` more 77 aggressively. The theory behind this change is that it's not just the GC that prefers smaller 78 `rep` threshold and no SSE. I am betting that `rep`ing more is better when the heap gets 79 chaotic and the data being copied is used in interesting ways -- hence, synthetic 80 memcpy/memset benchmarks think it's bad (they don't do enough chaotic memory accesses) while 81 it's good for real-world uses. Also, when I previously worked on JVMs, I had found that the 82 best memcpy/memset heuristics when dealing with GC'd objects in a crazy heap were different 83 than any memcpy/memset in any system library. 84 85 This appears to be a 0.9% speed-up on PLT. I'm not sure if it's more because of the inlining or 86 the `rep`. I think it's both. I'll leave figuring out the exact tuning for future patches. 87 88 * wtf/BitVector.cpp: 89 (WTF::BitVector::setSlow): 90 (WTF::BitVector::clearAll): 91 (WTF::BitVector::resizeOutOfLine): 92 * wtf/BitVector.h: 93 (WTF::BitVector::wordCount): 94 (WTF::BitVector::OutOfLineBits::numWords const): 95 * wtf/ConcurrentBuffer.h: 96 (WTF::ConcurrentBuffer::growExact): 97 * wtf/FastBitVector.h: 98 (WTF::FastBitVectorWordOwner::operator=): 99 (WTF::FastBitVectorWordOwner::clearAll): 100 (WTF::FastBitVectorWordOwner::set): 101 * wtf/FastCopy.h: Added. 102 (WTF::fastCopy): 103 (WTF::fastCopyBytes): 104 * wtf/FastMalloc.cpp: 105 (WTF::fastZeroedMalloc): 106 (WTF::fastStrDup): 107 (WTF::tryFastZeroedMalloc): 108 * wtf/FastZeroFill.h: Added. 109 (WTF::fastZeroFill): 110 (WTF::fastZeroFillBytes): 111 * wtf/MD5.cpp: 112 * wtf/OSAllocator.h: 113 (WTF::OSAllocator::reallocateCommitted): 114 * wtf/StringPrintStream.cpp: 115 (WTF::StringPrintStream::increaseSize): 116 * wtf/Vector.h: 117 * wtf/persistence/PersistentDecoder.cpp: 118 (WTF::Persistence::Decoder::decodeFixedLengthData): 119 * wtf/persistence/PersistentEncoder.cpp: 120 (WTF::Persistence::Encoder::encodeFixedLengthData): 121 * wtf/text/CString.cpp: 122 (WTF::CString::init): 123 (WTF::CString::copyBufferIfNeeded): 124 * wtf/text/LineBreakIteratorPoolICU.h: 125 (WTF::LineBreakIteratorPool::makeLocaleWithBreakKeyword): 126 * wtf/text/StringBuilder.cpp: 127 (WTF::StringBuilder::allocateBuffer): 128 (WTF::StringBuilder::append): 129 * wtf/text/StringConcatenate.h: 130 * wtf/text/StringImpl.h: 131 (WTF::StringImpl::copyCharacters): 132 * wtf/text/icu/UTextProvider.cpp: 133 (WTF::uTextCloneImpl): 134 * wtf/text/icu/UTextProviderLatin1.cpp: 135 (WTF::uTextLatin1Clone): 136 (WTF::openLatin1UTextProvider): 137 * wtf/threads/Signals.cpp: 138 1 139 2018-02-06 Darin Adler <darin@apple.com> 2 140 -
trunk/Source/WTF/WTF.xcodeproj/project.pbxproj
r227701 r228306 207 207 0F60F32D1DFCBD1B00416D6C /* LockedPrintStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = LockedPrintStream.cpp; sourceTree = "<group>"; }; 208 208 0F60F32E1DFCBD1B00416D6C /* LockedPrintStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LockedPrintStream.h; sourceTree = "<group>"; }; 209 0F62A8A6202CCC14007B8623 /* FastCopy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FastCopy.h; sourceTree = "<group>"; }; 210 0F62A8A7202CCC15007B8623 /* FastZeroFill.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FastZeroFill.h; sourceTree = "<group>"; }; 209 211 0F66B2801DC97BAB004A1D3F /* ClockType.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ClockType.cpp; sourceTree = "<group>"; }; 210 212 0F66B2811DC97BAB004A1D3F /* ClockType.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ClockType.h; sourceTree = "<group>"; }; … … 865 867 0F7C5FB51D885CF20044F5E2 /* FastBitVector.cpp */, 866 868 0FD81AC4154FB22E00983E72 /* FastBitVector.h */, 869 0F62A8A6202CCC14007B8623 /* FastCopy.h */, 867 870 A8A472A1151A825A004123FF /* FastMalloc.cpp */, 868 871 A8A472A2151A825A004123FF /* FastMalloc.h */, 869 872 0F79C7C31E73511800EB34D1 /* FastTLS.h */, 873 0F62A8A7202CCC15007B8623 /* FastZeroFill.h */, 870 874 B38FD7BC168953E80065C969 /* FeatureDefines.h */, 871 875 0F9D335B165DBA73005AD387 /* FilePrintStream.cpp */, -
trunk/Source/WTF/wtf/BitVector.cpp
r225668 r228306 1 1 /* 2 * Copyright (C) 2011 Apple Inc. All rights reserved.2 * Copyright (C) 2011-2018 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 30 30 #include <string.h> 31 31 #include <wtf/Assertions.h> 32 #include <wtf/FastCopy.h> 32 33 #include <wtf/FastMalloc.h> 34 #include <wtf/FastZeroFill.h> 33 35 #include <wtf/StdLibExtras.h> 34 36 … … 42 44 else { 43 45 OutOfLineBits* newOutOfLineBits = OutOfLineBits::create(other.size()); 44 memcpy(newOutOfLineBits->bits(), other.bits(), byteCount(other.size()));46 fastCopy(newOutOfLineBits->bits(), other.bits(), wordCount(other.size())); 45 47 newBitsOrPointer = bitwise_cast<uintptr_t>(newOutOfLineBits) >> 1; 46 48 } … … 70 72 m_bitsOrPointer = makeInlineBits(0); 71 73 else 72 memset(outOfLineBits()->bits(), 0, byteCount(size()));74 fastZeroFill(outOfLineBits()->bits(), wordCount(size())); 73 75 } 74 76 … … 94 96 // Make sure that all of the bits are zero in case we do a no-op resize. 95 97 *newOutOfLineBits->bits() = m_bitsOrPointer & ~(static_cast<uintptr_t>(1) << maxInlineBits()); 96 memset(newOutOfLineBits->bits() + 1, 0, (newNumWords - 1) * sizeof(void*));98 fastZeroFill(newOutOfLineBits->bits() + 1, newNumWords - 1); 97 99 } else { 98 100 if (numBits > size()) { 99 101 size_t oldNumWords = outOfLineBits()->numWords(); 100 memcpy(newOutOfLineBits->bits(), outOfLineBits()->bits(), oldNumWords * sizeof(void*));101 memset(newOutOfLineBits->bits() + oldNumWords, 0, (newNumWords - oldNumWords) * sizeof(void*));102 fastCopy(newOutOfLineBits->bits(), outOfLineBits()->bits(), oldNumWords); 103 fastZeroFill(newOutOfLineBits->bits() + oldNumWords, newNumWords - oldNumWords); 102 104 } else 103 memcpy(newOutOfLineBits->bits(), outOfLineBits()->bits(), newOutOfLineBits->numWords() * sizeof(void*));105 fastCopy(newOutOfLineBits->bits(), outOfLineBits()->bits(), newOutOfLineBits->numWords()); 104 106 OutOfLineBits::destroy(outOfLineBits()); 105 107 } -
trunk/Source/WTF/wtf/BitVector.h
r225524 r228306 355 355 } 356 356 357 static size_t wordCount(uintptr_t bits) 358 { 359 return (bits + bitsInPointer() - 1) / bitsInPointer(); 360 } 361 357 362 static uintptr_t makeInlineBits(uintptr_t bits) 358 363 { … … 419 424 public: 420 425 size_t numBits() const { return m_numBits; } 421 size_t numWords() const { return (m_numBits + bitsInPointer() - 1) / bitsInPointer(); }426 size_t numWords() const { return wordCount(m_numBits); } 422 427 uintptr_t* bits() { return bitwise_cast<uintptr_t*>(this + 1); } 423 428 const uintptr_t* bits() const { return bitwise_cast<const uintptr_t*>(this + 1); } -
trunk/Source/WTF/wtf/CMakeLists.txt
r228136 r228306 60 60 ExportMacros.h 61 61 FastBitVector.h 62 FastCopy.h 62 63 FastMalloc.h 63 64 FastTLS.h 65 FastZeroFill.h 64 66 FeatureDefines.h 65 67 FilePrintStream.h -
trunk/Source/WTF/wtf/ConcurrentBuffer.h
r225831 r228306 27 27 28 28 #include <wtf/Atomics.h> 29 #include <wtf/FastCopy.h> 29 30 #include <wtf/FastMalloc.h> 30 31 #include <wtf/HashFunctions.h> … … 66 67 // This allows us to do ConcurrentBuffer<std::unique_ptr<>>. 67 68 if (array) 68 memcpy(newArray->data, array->data, sizeof(T) *array->size);69 fastCopy(newArray->data, array->data, array->size); 69 70 for (size_t i = array ? array->size : 0; i < newSize; ++i) 70 71 new (newArray->data + i) T(); -
trunk/Source/WTF/wtf/FastBitVector.h
r208209 r228306 1 1 /* 2 * Copyright (C) 2012 , 2013, 2016Apple Inc. All rights reserved.2 * Copyright (C) 2012-2018 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 28 28 #include <string.h> 29 29 #include <wtf/Atomics.h> 30 #include <wtf/FastCopy.h> 30 31 #include <wtf/FastMalloc.h> 32 #include <wtf/FastZeroFill.h> 31 33 #include <wtf/PrintStream.h> 32 34 #include <wtf/StdLibExtras.h> … … 96 98 setEqualsSlow(other); 97 99 else { 98 memcpy(m_words, other.m_words, arrayLength() * sizeof(uint32_t));100 fastCopy(m_words, other.m_words, arrayLength()); 99 101 m_numBits = other.m_numBits; 100 102 } … … 116 118 void clearAll() 117 119 { 118 memset(m_words, 0, arrayLength() * sizeof(uint32_t));120 fastZeroFill(m_words, arrayLength()); 119 121 } 120 122 … … 122 124 { 123 125 ASSERT_WITH_SECURITY_IMPLICATION(m_numBits == other.m_numBits); 124 memcpy(m_words, other.m_words, arrayLength() * sizeof(uint32_t));126 fastCopy(m_words, other.m_words, arrayLength()); 125 127 } 126 128 -
trunk/Source/WTF/wtf/FastMalloc.cpp
r220118 r228306 1 1 /* 2 2 * Copyright (c) 2005, 2007, Google Inc. All rights reserved. 3 * Copyright (C) 2005-201 7Apple Inc. All rights reserved.3 * Copyright (C) 2005-2018 Apple Inc. All rights reserved. 4 4 * Redistribution and use in source and binary forms, with or without 5 5 * modification, are permitted provided that the following conditions … … 32 32 #include <string.h> 33 33 #include <wtf/DataLog.h> 34 #include <wtf/FastCopy.h> 35 #include <wtf/FastZeroFill.h> 34 36 35 37 #if OS(WINDOWS) … … 79 81 { 80 82 void* result = fastMalloc(n); 81 memset(result, 0, n);83 fastZeroFillBytes(result, n); 82 84 return result; 83 85 } … … 87 89 size_t len = strlen(src) + 1; 88 90 char* dup = static_cast<char*>(fastMalloc(len)); 89 memcpy(dup, src, len);91 fastCopy(dup, src, len); 90 92 return dup; 91 93 } … … 96 98 if (!tryFastMalloc(n).getValue(result)) 97 99 return 0; 98 memset(result, 0, n);100 fastZeroFillBytes(result, n); 99 101 return result; 100 102 } -
trunk/Source/WTF/wtf/OSAllocator.h
r227951 r228306 1 1 /* 2 * Copyright (C) 2010 Apple Inc. All rights reserved.2 * Copyright (C) 2010-2018 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 28 28 29 29 #include <algorithm> 30 #include <wtf/FastCopy.h> 30 31 #include <wtf/VMTags.h> 31 32 … … 91 92 { 92 93 void* newBase = reserveAndCommit(newSize, usage, writable, executable); 93 memcpy(newBase, oldBase, std::min(oldSize, newSize));94 fastCopyBytes(newBase, oldBase, std::min(oldSize, newSize)); 94 95 decommitAndRelease(oldBase, oldSize); 95 96 return static_cast<T*>(newBase); -
trunk/Source/WTF/wtf/StringPrintStream.cpp
r225618 r228306 29 29 #include <stdarg.h> 30 30 #include <stdio.h> 31 #include <wtf/FastCopy.h> 31 32 #include <wtf/FastMalloc.h> 32 33 … … 120 121 // we can't realloc the inline buffer. 121 122 char* newBuffer = static_cast<char*>(fastMalloc(m_size)); 122 memcpy(newBuffer, m_buffer, m_next + 1);123 fastCopy(newBuffer, m_buffer, m_next + 1); 123 124 if (m_buffer != m_inlineBuffer) 124 125 fastFree(m_buffer); -
trunk/Source/WTF/wtf/Vector.h
r226068 r228306 28 28 #include <utility> 29 29 #include <wtf/CheckedArithmetic.h> 30 #include <wtf/FastCopy.h> 30 31 #include <wtf/FastMalloc.h> 32 #include <wtf/FastZeroFill.h> 31 33 #include <wtf/Forward.h> 32 34 #include <wtf/MallocPtr.h> … … 87 89 static void initialize(T* begin, T* end) 88 90 { 89 memset(begin, 0, reinterpret_cast<char*>(end) - reinterpret_cast<char*>(begin));91 fastZeroFill(begin, end - begin); 90 92 } 91 93 }; … … 127 129 static void move(const T* src, const T* srcEnd, T* dst) 128 130 { 129 memcpy(dst, src, reinterpret_cast<const char*>(srcEnd) - reinterpret_cast<const char*>(src));131 fastCopy(dst, src, srcEnd - src); 130 132 } 131 133 static void moveOverlapping(const T* src, const T* srcEnd, T* dst) -
trunk/Source/WTF/wtf/persistence/PersistentDecoder.cpp
r220574 r228306 53 53 return false; 54 54 55 memcpy(data, m_bufferPosition, size);55 fastCopy(data, m_bufferPosition, size); 56 56 m_bufferPosition += size; 57 57 -
trunk/Source/WTF/wtf/persistence/PersistentEncoder.cpp
r220574 r228306 59 59 60 60 uint8_t* buffer = grow(size); 61 memcpy(buffer, data, size);61 fastCopy(buffer, data, size); 62 62 } 63 63 -
trunk/Source/WTF/wtf/text/CString.cpp
r225463 r228306 1 1 /* 2 * Copyright (C) 2003-201 7Apple Inc. All rights reserved.2 * Copyright (C) 2003-2018 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 29 29 30 30 #include <string.h> 31 #include <wtf/FastCopy.h> 31 32 #include <wtf/text/StringHasher.h> 32 33 #include <wtf/text/StringMalloc.h> … … 67 68 68 69 m_buffer = CStringBuffer::createUninitialized(length); 69 memcpy(m_buffer->mutableData(), str, length);70 fastCopy(m_buffer->mutableData(), str, length); 70 71 m_buffer->mutableData()[length] = '\0'; 71 72 } … … 97 98 size_t length = buffer->length(); 98 99 m_buffer = CStringBuffer::createUninitialized(length); 99 memcpy(m_buffer->mutableData(), buffer->data(), length + 1);100 fastCopy(m_buffer->mutableData(), buffer->data(), length + 1); 100 101 } 101 102 -
trunk/Source/WTF/wtf/text/LineBreakIteratorPoolICU.h
r218594 r228306 1 1 /* 2 * Copyright (C) 2011 Apple Inc. All Rights Reserved.2 * Copyright (C) 2011-2018 Apple Inc. All Rights Reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 27 27 28 28 #include <unicode/uloc.h> 29 #include <wtf/FastCopy.h> 30 #include <wtf/FastZeroFill.h> 29 31 #include <wtf/HashMap.h> 30 32 #include <wtf/NeverDestroyed.h> … … 52 54 return locale; 53 55 Vector<char> scratchBuffer(utf8Locale.length() + 11, 0); 54 memcpy(scratchBuffer.data(), utf8Locale.data(), utf8Locale.length());56 fastCopy(scratchBuffer.data(), utf8Locale.data(), utf8Locale.length()); 55 57 56 58 const char* keywordValue = nullptr; … … 76 78 if (status == U_BUFFER_OVERFLOW_ERROR) { 77 79 scratchBuffer.grow(lengthNeeded + 1); 78 memset(scratchBuffer.data() + utf8Locale.length(), 0, scratchBuffer.size() - utf8Locale.length());80 fastZeroFill(scratchBuffer.data() + utf8Locale.length(), scratchBuffer.size() - utf8Locale.length()); 79 81 status = U_ZERO_ERROR; 80 82 int32_t lengthNeeded2 = uloc_setKeywordValue("lb", keywordValue, scratchBuffer.data(), scratchBuffer.size(), &status); -
trunk/Source/WTF/wtf/text/StringBuilder.cpp
r221330 r228306 100 100 // Copy the existing data into a new buffer, set result to point to the end of the existing data. 101 101 auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters8); 102 memcpy(m_bufferCharacters8, currentCharacters, static_cast<size_t>(m_length) * sizeof(LChar)); // This can't overflow.102 fastCopy(m_bufferCharacters8, currentCharacters, m_length); 103 103 104 104 // Update the builder state. … … 115 115 // Copy the existing data into a new buffer, set result to point to the end of the existing data. 116 116 auto buffer = StringImpl::createUninitialized(requiredLength, m_bufferCharacters16); 117 memcpy(m_bufferCharacters16, currentCharacters, static_cast<size_t>(m_length) * sizeof(UChar)); // This can't overflow.117 fastCopy(m_bufferCharacters16, currentCharacters, m_length); 118 118 119 119 // Update the builder state. … … 277 277 } 278 278 279 memcpy(m_bufferCharacters16 + m_length, characters, static_cast<size_t>(length) * sizeof(UChar));279 fastCopy(m_bufferCharacters16 + m_length, characters, length); 280 280 m_length = requiredLength; 281 281 } else 282 memcpy(appendUninitialized<UChar>(length), characters, static_cast<size_t>(length) * sizeof(UChar));282 fastCopy(appendUninitialized<UChar>(length), characters, length); 283 283 ASSERT(m_buffer->length() >= m_length); 284 284 } … … 292 292 if (m_is8Bit) { 293 293 LChar* dest = appendUninitialized<LChar>(length); 294 if (length > 8) 295 memcpy(dest, characters, static_cast<size_t>(length) * sizeof(LChar)); 296 else { 297 const LChar* end = characters + length; 298 while (characters < end) 299 *(dest++) = *(characters++); 300 } 294 fastCopy(dest, characters, length); 301 295 } else { 302 296 UChar* dest = appendUninitialized<UChar>(length); -
trunk/Source/WTF/wtf/text/StringConcatenate.h
r225824 r228306 28 28 29 29 #include <string.h> 30 #include <wtf/FastCopy.h> 30 31 31 32 #ifndef AtomicString_h … … 158 159 void writeTo(UChar* destination) const 159 160 { 160 memcpy(destination, m_characters, m_length * sizeof(UChar));161 fastCopy(destination, m_characters, m_length); 161 162 } 162 163 -
trunk/Source/WTF/wtf/text/StringImpl.h
r227691 r228306 1067 1067 return; 1068 1068 } 1069 memcpy(destination, source, numCharacters * sizeof(CharacterType));1069 fastCopy(destination, source, numCharacters); 1070 1070 } 1071 1071 -
trunk/Source/WTF/wtf/text/icu/UTextProvider.cpp
r203038 r228306 29 29 #include <algorithm> 30 30 #include <string.h> 31 #include <wtf/FastCopy.h> 31 32 32 33 namespace WTF { … … 56 57 int32_t flags = destination->flags; 57 58 int sizeToCopy = std::min(source->sizeOfStruct, destination->sizeOfStruct); 58 memcpy(destination, source, sizeToCopy);59 fastCopyBytes(destination, source, sizeToCopy); 59 60 destination->pExtra = extraNew; 60 61 destination->flags = flags; 61 memcpy(destination->pExtra, source->pExtra, extraSize);62 fastCopyBytes(destination->pExtra, source->pExtra, extraSize); 62 63 fixPointer(source, destination, destination->context); 63 64 fixPointer(source, destination, destination->p); -
trunk/Source/WTF/wtf/text/icu/UTextProviderLatin1.cpp
r225117 r228306 28 28 29 29 #include "UTextProvider.h" 30 #include <wtf/FastZeroFill.h> 30 31 #include <wtf/text/StringImpl.h> 31 32 … … 83 84 result->pFuncs = &uTextLatin1Funcs; 84 85 result->chunkContents = (UChar*)result->pExtra; 85 memset(const_cast<UChar*>(result->chunkContents), 0, sizeof(UChar) *UTextWithBufferInlineCapacity);86 fastZeroFill(const_cast<UChar*>(result->chunkContents), UTextWithBufferInlineCapacity); 86 87 87 88 return result; … … 229 230 text->pFuncs = &uTextLatin1Funcs; 230 231 text->chunkContents = (UChar*)text->pExtra; 231 memset(const_cast<UChar*>(text->chunkContents), 0, sizeof(UChar) *UTextWithBufferInlineCapacity);232 fastZeroFill(const_cast<UChar*>(text->chunkContents), UTextWithBufferInlineCapacity); 232 233 233 234 return text; -
trunk/Source/WTF/wtf/threads/Signals.cpp
r219760 r228306 173 173 RELEASE_ASSERT(signal != Signal::Unknown); 174 174 175 memcpy(outState, inState, inStateCount * sizeof(inState[0]));175 fastCopy(outState, inState, inStateCount); 176 176 *outStateCount = inStateCount; 177 177 -
trunk/Source/bmalloc/ChangeLog
r228108 r228306 1 2018-02-08 Filip Pizlo <fpizlo@apple.com> 2 3 Experiment with alternative implementation of memcpy/memset 4 https://bugs.webkit.org/show_bug.cgi?id=182563 5 6 Reviewed by Michael Saboff and Mark Lam. 7 8 Add a faster x86_64-specific implementation of memcpy and memset. Ideally, this would just be 9 implemented in WTF, but we have to copy it into bmalloc since bmalloc sits below WTF on the 10 stack. 11 12 * bmalloc/Algorithm.h: 13 (bmalloc::fastCopy): 14 (bmalloc::fastZeroFill): 15 * bmalloc/Allocator.cpp: 16 (bmalloc::Allocator::reallocate): 17 * bmalloc/Bits.h: 18 (bmalloc::BitsWordOwner::operator=): 19 (bmalloc::BitsWordOwner::clearAll): 20 (bmalloc::BitsWordOwner::set): 21 * bmalloc/IsoPageInlines.h: 22 (bmalloc::IsoPage<Config>::IsoPage): 23 * bmalloc/Vector.h: 24 (bmalloc::Vector<T>::reallocateBuffer): 25 1 26 2018-02-05 JF Bastien <jfbastien@apple.com> 2 27 -
trunk/Source/bmalloc/bmalloc/Algorithm.h
r225701 r228306 181 181 } 182 182 183 template<typename T> 184 void fastCopy(T* dst, T* src, size_t length) 185 { 186 #if BCPU(X86_64) 187 uint64_t tmp = 0; 188 size_t count = length * sizeof(T); 189 if (!(sizeof(T) % sizeof(uint64_t))) { 190 asm volatile ( 191 "cmpq $200, %%rcx\n\t" 192 "jb 1f\n\t" 193 "shrq $3, %%rcx\n\t" 194 "rep movsq\n\t" 195 "jmp 2f\n\t" 196 "3:\n\t" 197 "movq (%%rsi, %%rcx), %%rax\n\t" 198 "movq %%rax, (%%rdi, %%rcx)\n\t" 199 "1:\n\t" 200 "subq $8, %%rcx\n\t" 201 "jae 3b\n\t" 202 "2:\n\t" 203 : "+D"(dst), "+S"(src), "+c"(count), "+a"(tmp) 204 : 205 : "memory" 206 ); 207 return; 208 } 209 if (!(sizeof(T) % sizeof(uint32_t))) { 210 asm volatile ( 211 "cmpq $200, %%rcx\n\t" 212 "jb 1f\n\t" 213 "shrq $2, %%rcx\n\t" 214 "rep movsl\n\t" 215 "jmp 2f\n\t" 216 "3:\n\t" 217 "movq (%%rsi, %%rcx), %%rax\n\t" 218 "movq %%rax, (%%rdi, %%rcx)\n\t" 219 "1:\n\t" 220 "subq $8, %%rcx\n\t" 221 "jae 3b\n\t" 222 "cmpq $-8, %%rcx\n\t" 223 "je 2f\n\t" 224 "addq $4, %%rcx\n\t" // FIXME: This isn't really a loop. https://bugs.webkit.org/show_bug.cgi?id=182617 225 "4:\n\t" 226 "movl (%%rsi, %%rcx), %%eax\n\t" 227 "movl %%eax, (%%rdi, %%rcx)\n\t" 228 "subq $4, %%rcx\n\t" 229 "jae 4b\n\t" 230 "2:\n\t" 231 : "+D"(dst), "+S"(src), "+c"(count), "+a"(tmp) 232 : 233 : "memory" 234 ); 235 return; 236 } 237 if (!(sizeof(T) % sizeof(uint16_t))) { 238 asm volatile ( 239 "cmpq $200, %%rcx\n\t" 240 "jb 1f\n\t" 241 "shrq $1, %%rcx\n\t" 242 "rep movsw\n\t" 243 "jmp 2f\n\t" 244 "3:\n\t" 245 "movq (%%rsi, %%rcx), %%rax\n\t" 246 "movq %%rax, (%%rdi, %%rcx)\n\t" 247 "1:\n\t" 248 "subq $8, %%rcx\n\t" 249 "jae 3b\n\t" 250 "cmpq $-8, %%rcx\n\t" 251 "je 2f\n\t" 252 "addq $6, %%rcx\n\t" 253 "4:\n\t" 254 "movw (%%rsi, %%rcx), %%ax\n\t" 255 "movw %%ax, (%%rdi, %%rcx)\n\t" 256 "subq $2, %%rcx\n\t" 257 "jae 4b\n\t" 258 "2:\n\t" 259 : "+D"(dst), "+S"(src), "+c"(count), "+a"(tmp) 260 : 261 : "memory" 262 ); 263 return; 264 } 265 asm volatile ( 266 "cmpq $200, %%rcx\n\t" 267 "jb 1f\n\t" 268 "rep movsb\n\t" 269 "jmp 2f\n\t" 270 "3:\n\t" 271 "movq (%%rsi, %%rcx), %%rax\n\t" 272 "movq %%rax, (%%rdi, %%rcx)\n\t" 273 "1:\n\t" 274 "subq $8, %%rcx\n\t" 275 "jae 3b\n\t" 276 "cmpq $-8, %%rcx\n\t" 277 "je 2f\n\t" 278 "addq $7, %%rcx\n\t" 279 "4:\n\t" 280 "movb (%%rsi, %%rcx), %%al\n\t" 281 "movb %%al, (%%rdi, %%rcx)\n\t" 282 "subq $1, %%rcx\n\t" 283 "jae 4b\n\t" 284 "2:\n\t" 285 : "+D"(dst), "+S"(src), "+c"(count), "+a"(tmp) 286 : 287 : "memory" 288 ); 289 #else 290 memcpy(dst, src, length * sizeof(T)); 291 #endif 292 } 293 294 template<typename T> 295 void fastZeroFill(T* dst, size_t length) 296 { 297 #if BCPU(X86_64) 298 uint64_t zero = 0; 299 size_t count = length * sizeof(T); 300 if (!(sizeof(T) % sizeof(uint64_t))) { 301 asm volatile ( 302 "cmpq $200, %%rcx\n\t" 303 "jb 1f\n\t" 304 "shrq $3, %%rcx\n\t" 305 "rep stosq\n\t" 306 "jmp 2f\n\t" 307 "3:\n\t" 308 "movq %%rax, (%%rdi, %%rcx)\n\t" 309 "1:\n\t" 310 "subq $8, %%rcx\n\t" 311 "jae 3b\n\t" 312 "2:\n\t" 313 : "+D"(dst), "+c"(count) 314 : "a"(zero) 315 : "memory" 316 ); 317 return; 318 } 319 if (!(sizeof(T) % sizeof(uint32_t))) { 320 asm volatile ( 321 "cmpq $200, %%rcx\n\t" 322 "jb 1f\n\t" 323 "shrq $2, %%rcx\n\t" 324 "rep stosl\n\t" 325 "jmp 2f\n\t" 326 "3:\n\t" 327 "movq %%rax, (%%rdi, %%rcx)\n\t" 328 "1:\n\t" 329 "subq $8, %%rcx\n\t" 330 "jae 3b\n\t" 331 "cmpq $-8, %%rcx\n\t" 332 "je 2f\n\t" 333 "addq $4, %%rcx\n\t" // FIXME: This isn't really a loop. https://bugs.webkit.org/show_bug.cgi?id=182617 334 "4:\n\t" 335 "movl %%eax, (%%rdi, %%rcx)\n\t" 336 "subq $4, %%rcx\n\t" 337 "jae 4b\n\t" 338 "2:\n\t" 339 : "+D"(dst), "+c"(count) 340 : "a"(zero) 341 : "memory" 342 ); 343 return; 344 } 345 if (!(sizeof(T) % sizeof(uint16_t))) { 346 asm volatile ( 347 "cmpq $200, %%rcx\n\t" 348 "jb 1f\n\t" 349 "shrq $1, %%rcx\n\t" 350 "rep stosw\n\t" 351 "jmp 2f\n\t" 352 "3:\n\t" 353 "movq %%rax, (%%rdi, %%rcx)\n\t" 354 "1:\n\t" 355 "subq $8, %%rcx\n\t" 356 "jae 3b\n\t" 357 "cmpq $-8, %%rcx\n\t" 358 "je 2f\n\t" 359 "addq $6, %%rcx\n\t" 360 "4:\n\t" 361 "movw %%ax, (%%rdi, %%rcx)\n\t" 362 "subq $2, %%rcx\n\t" 363 "jae 4b\n\t" 364 "2:\n\t" 365 : "+D"(dst), "+c"(count) 366 : "a"(zero) 367 : "memory" 368 ); 369 return; 370 } 371 asm volatile ( 372 "cmpq $200, %%rcx\n\t" 373 "jb 1f\n\t" 374 "rep stosb\n\t" 375 "jmp 2f\n\t" 376 "3:\n\t" 377 "movq %%rax, (%%rdi, %%rcx)\n\t" 378 "1:\n\t" 379 "subq $8, %%rcx\n\t" 380 "jae 3b\n\t" 381 "cmpq $-8, %%rcx\n\t" 382 "je 2f\n\t" 383 "addq $7, %%rcx\n\t" 384 "4:\n\t" 385 "movb %%al, (%%rdi, %%rcx)\n\t" 386 "sub $1, %%rcx\n\t" 387 "jae 4b\n\t" 388 "2:\n\t" 389 : "+D"(dst), "+c"(count) 390 : "a"(zero) 391 : "memory" 392 ); 393 #else 394 memset(dst, 0, length * sizeof(T)); 395 #endif 396 } 397 183 398 } // namespace bmalloc 184 399 -
trunk/Source/bmalloc/bmalloc/Allocator.cpp
r220352 r228306 126 126 void* result = allocate(newSize); 127 127 size_t copySize = std::min(oldSize, newSize); 128 memcpy(result, object, copySize);128 fastCopy(static_cast<char*>(result), static_cast<char*>(object), copySize); 129 129 m_deallocator.deallocate(object); 130 130 return result; -
trunk/Source/bmalloc/bmalloc/Bits.h
r224537 r228306 81 81 BitsWordOwner& operator=(const BitsWordOwner& other) 82 82 { 83 memcpy(m_words, other.m_words, arrayLength() * sizeof(uint32_t));83 fastCopy(m_words, other.m_words, arrayLength()); 84 84 return *this; 85 85 } … … 92 92 void clearAll() 93 93 { 94 memset(m_words, 0, arrayLength() * sizeof(uint32_t));94 fastZeroFill(m_words, arrayLength()); 95 95 } 96 96 97 97 void set(const BitsWordOwner& other) 98 98 { 99 memcpy(m_words, other.m_words, arrayLength() * sizeof(uint32_t));99 fastCopy(m_words, other.m_words, arrayLength()); 100 100 } 101 101 -
trunk/Source/bmalloc/bmalloc/IsoPageInlines.h
r225125 r228306 1 1 /* 2 * Copyright (C) 2017 Apple Inc. All rights reserved.2 * Copyright (C) 2017-2018 Apple Inc. All rights reserved. 3 3 * 4 4 * Redistribution and use in source and binary forms, with or without … … 48 48 , m_index(index) 49 49 { 50 memset(m_allocBits, 0, sizeof(m_allocBits));50 fastZeroFill(m_allocBits, bitsArrayLength(numObjects)); 51 51 } 52 52 -
trunk/Source/bmalloc/bmalloc/Vector.h
r220352 r228306 204 204 T* newBuffer = vmSize ? static_cast<T*>(vmAllocate(vmSize)) : nullptr; 205 205 if (m_buffer) { 206 std::memcpy(newBuffer, m_buffer, m_size * sizeof(T));206 fastCopy(newBuffer, m_buffer, m_size); 207 207 vmDeallocate(m_buffer, bmalloc::vmSize(m_capacity * sizeof(T))); 208 208 }
Note: See TracChangeset
for help on using the changeset viewer.