Changeset 268539 in webkit
- Timestamp:
- Oct 15, 2020 11:21:25 AM (3 years ago)
- Location:
- trunk/Source/WebCore
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r268522 r268539 1 2020-10-15 Chris Dumez <cdumez@apple.com> 2 3 Vectorize StereoPanner's panToTargetValue() 4 https://bugs.webkit.org/show_bug.cgi?id=217765 5 6 Reviewed by Geoffrey Garen. 7 8 Vectorize StereoPanner's panToTargetValue(). 9 10 No new tests, no Web-facing behavior change. 11 12 * platform/audio/StereoPanner.cpp: 13 (WebCore::StereoPanner::panToTargetValue): 14 * platform/audio/VectorMath.cpp: 15 (WebCore::VectorMath::multiplyByScalar): 16 (WebCore::VectorMath::multiplyByScalarThenAddToOutput): 17 (WebCore::VectorMath::multiplyByScalarThenAddToVector): 18 * platform/audio/VectorMath.h: 19 1 20 2020-10-15 Chris Lord <clord@igalia.com> 2 21 -
trunk/Source/WebCore/platform/audio/StereoPanner.cpp
r265962 r268539 30 30 #if ENABLE(WEB_AUDIO) 31 31 32 #include "VectorMath.h" 32 33 #include <wtf/MathExtras.h> 33 34 … … 117 118 if (!sourceL || !sourceR || !destinationL || !destinationR) 118 119 return; 119 120 120 121 float targetPan = clampTo(panValue, -1.0, 1.0); 121 122 int n = framesToProcess;123 122 124 123 if (numberOfInputChannels == 1) { … … 128 127 double gainR = sin(panRadian); 129 128 130 while (n--) { 131 float inputL = *sourceL++; 132 *destinationL++ = static_cast<float>(inputL * gainL); 133 *destinationR++ = static_cast<float>(inputL * gainR); 134 } 129 VectorMath::multiplyByScalar(sourceL, gainL, destinationL, framesToProcess); 130 VectorMath::multiplyByScalar(sourceL, gainR, destinationR, framesToProcess); 135 131 } else { 136 132 double panRadian = (targetPan <= 0 ? targetPan + 1 : targetPan) * piOverTwoDouble; … … 138 134 double gainL = cos(panRadian); 139 135 double gainR = sin(panRadian); 140 141 while (n--) { 142 float inputL = *sourceL++; 143 float inputR = *sourceR++; 144 if (targetPan <= 0) { 145 *destinationL++ = static_cast<float>(inputL + inputR * gainL); 146 *destinationR++ = static_cast<float>(inputR * gainR); 147 } else { 148 *destinationL++ = static_cast<float>(inputL * gainL); 149 *destinationR++ = static_cast<float>(inputR + inputL * gainR); 150 } 136 137 if (targetPan <= 0) { 138 VectorMath::multiplyByScalarThenAddToVector(sourceR, gainL, sourceL, destinationL, framesToProcess); 139 VectorMath::multiplyByScalar(sourceR, gainR, destinationR, framesToProcess); 140 } else { 141 VectorMath::multiplyByScalar(sourceL, gainL, destinationL, framesToProcess); 142 VectorMath::multiplyByScalarThenAddToVector(sourceL, gainR, sourceR, destinationR, framesToProcess); 151 143 } 152 144 } -
trunk/Source/WebCore/platform/audio/VectorMath.cpp
r268506 r268539 53 53 // On the Mac we use the highly optimized versions in Accelerate.framework 54 54 55 void multiplyByScalar(const float* inputVector, float scal e, float* outputVector, size_t numberOfElementsToProcess)56 { 57 vDSP_vsmul(inputVector, 1, &scal e, outputVector, 1, numberOfElementsToProcess);55 void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess) 56 { 57 vDSP_vsmul(inputVector, 1, &scalar, outputVector, 1, numberOfElementsToProcess); 58 58 } 59 59 … … 87 87 } 88 88 89 void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess) 90 { 91 vDSP_vsma(inputVector, 1, &scale, outputVector, 1, outputVector, 1, numberOfElementsToProcess); 89 void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess) 90 { 91 vDSP_vsma(inputVector, 1, &scalar, outputVector, 1, outputVector, 1, numberOfElementsToProcess); 92 } 93 94 void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess) 95 { 96 vDSP_vsma(inputVector1, 1, &scalar, inputVector2, 1, outputVector, 1, numberOfElementsToProcess); 92 97 } 93 98 … … 129 134 } 130 135 131 void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess) 136 void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess) 137 { 138 multiplyByScalarThenAddToOutput(inputVector1, scalar, outputVector, numberOfElementsToProcess); 139 add(outputVector, inputVector2, outputVector, numberOfElementsToProcess); 140 } 141 142 void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess) 132 143 { 133 144 size_t n = numberOfElementsToProcess; … … 136 147 // If the inputVector address is not 16-byte aligned, the first several frames (at most three) should be processed separately. 137 148 while (!is16ByteAligned(inputVector) && n) { 138 *outputVector += scal e* *inputVector;149 *outputVector += scalar * *inputVector; 139 150 inputVector++; 140 151 outputVector++; … … 149 160 __m128 dest; 150 161 __m128 temp; 151 __m128 mScale = _mm_set_ps1(scal e);162 __m128 mScale = _mm_set_ps1(scalar); 152 163 153 164 bool destAligned = is16ByteAligned(outputVector); … … 175 186 const float* endP = outputVector + n - tailFrames; 176 187 177 float32x4_t k = vdupq_n_f32(scal e);188 float32x4_t k = vdupq_n_f32(scalar); 178 189 while (outputVector < endP) { 179 190 float32x4_t source = vld1q_f32(inputVector); … … 189 200 #endif 190 201 while (n--) { 191 *outputVector += *inputVector * scal e;202 *outputVector += *inputVector * scalar; 192 203 ++inputVector; 193 204 ++outputVector; … … 195 206 } 196 207 197 void multiplyByScalar(const float* inputVector, float scal e, float* outputVector, size_t numberOfElementsToProcess)208 void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess) 198 209 { 199 210 size_t n = numberOfElementsToProcess; … … 202 213 // If the inputVector address is not 16-byte aligned, the first several frames (at most three) should be processed separately. 203 214 while (!is16ByteAligned(inputVector) && n) { 204 *outputVector = scal e* *inputVector;215 *outputVector = scalar * *inputVector; 205 216 inputVector++; 206 217 outputVector++; … … 210 221 // Now the inputVector address is aligned and start to apply SSE. 211 222 size_t group = n / 4; 212 __m128 mScale = _mm_set_ps1(scal e);223 __m128 mScale = _mm_set_ps1(scalar); 213 224 __m128* pSource; 214 225 __m128* pDest; … … 244 255 while (outputVector < endP) { 245 256 float32x4_t source = vld1q_f32(inputVector); 246 vst1q_f32(outputVector, vmulq_n_f32(source, scal e));257 vst1q_f32(outputVector, vmulq_n_f32(source, scalar)); 247 258 248 259 inputVector += 4; … … 252 263 #endif 253 264 while (n--) { 254 *outputVector = scal e* *inputVector;265 *outputVector = scalar * *inputVector; 255 266 ++inputVector; 256 267 ++outputVector; -
trunk/Source/WebCore/platform/audio/VectorMath.h
r268506 r268539 32 32 namespace VectorMath { 33 33 34 // Multiples inputVector by scalar then adds the result to outputVector ( vsma).34 // Multiples inputVector by scalar then adds the result to outputVector (simplified vsma). 35 35 // for (n = 0; n < numberOfElementsToProcess; ++n) 36 // outputVector[n] += inputVector[n] * scale; 37 void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess); 36 // outputVector[n] += inputVector[n] * scalar; 37 void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess); 38 39 // Adds a vector inputVector2 to the product of a scalar value and a single-precision vector inputVector1 (vsma). 40 // for (n = 0; n < numberOfElementsToProcess; ++n) 41 // outputVector[n] = inputVector1[n] * scalar + inputVector2[n]; 42 void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess); 38 43 39 44 // Multiplies the sum of two vectors by a scalar value (vasm). 40 45 void addVectorsThenMultiplyByScalar(const float* inputVector1, const float* inputVector2, float scalar, float* outputVector, size_t numberOfElementsToProcess); 41 46 42 void multiplyByScalar(const float* inputVector, float scal e, float* outputVector, size_t numberOfElementsToProcess);47 void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess); 43 48 void addScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess); 44 49 void add(const float* inputVector1, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess);
Note: See TracChangeset
for help on using the changeset viewer.