Context Navigation

← Previous Changeset
Next Changeset →

Changeset 268539 in webkit

Timestamp:

Oct 15, 2020, 11:21:25 AM (5 years ago)

Author:

Chris Dumez

Message:

Vectorize StereoPanner's panToTargetValue()
https://bugs.webkit.org/show_bug.cgi?id=217765

Reviewed by Geoffrey Garen.

Vectorize StereoPanner's panToTargetValue().

No new tests, no Web-facing behavior change.

platform/audio/StereoPanner.cpp:

(WebCore::StereoPanner::panToTargetValue):

platform/audio/VectorMath.cpp:

(WebCore::VectorMath::multiplyByScalar):
(WebCore::VectorMath::multiplyByScalarThenAddToOutput):
(WebCore::VectorMath::multiplyByScalarThenAddToVector):

platform/audio/VectorMath.h:

Location:

trunk/Source/WebCore

Files:

: 4 edited

ChangeLog (modified) (1 diff)
platform/audio/StereoPanner.cpp (modified) (4 diffs)
platform/audio/VectorMath.cpp (modified) (12 diffs)
platform/audio/VectorMath.h (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

trunk/Source/WebCore/ChangeLog

-              r268522
+              r268539
+-10-15  Chris Dumez  <cdumez@apple.com>
+        Vectorize StereoPanner's panToTargetValue()
+        https://bugs.webkit.org/show_bug.cgi?id=217765
+        Reviewed by Geoffrey Garen.
+        Vectorize StereoPanner's panToTargetValue().
+        No new tests, no Web-facing behavior change.
+        * platform/audio/StereoPanner.cpp:
+        (WebCore::StereoPanner::panToTargetValue):
+        * platform/audio/VectorMath.cpp:
+        (WebCore::VectorMath::multiplyByScalar):
+        (WebCore::VectorMath::multiplyByScalarThenAddToOutput):
+        (WebCore::VectorMath::multiplyByScalarThenAddToVector):
+        * platform/audio/VectorMath.h:
 -10-15  Chris Lord  <clord@igalia.com>

trunk/Source/WebCore/platform/audio/StereoPanner.cpp

-              r265962
+              r268539
 #if ENABLE(WEB_AUDIO)
+#include "VectorMath.h"
 #include <wtf/MathExtras.h>
 …
     if (!sourceL || !sourceR || !destinationL || !destinationR)
         return;
     float targetPan = clampTo(panValue, -1.0, 1.0);
-    int n = framesToProcess;
     if (numberOfInputChannels == 1) {
 …
         double gainR = sin(panRadian);
+        while (n--) {
+            float inputL = *sourceL++;
+            *destinationL++ = static_cast<float>(inputL * gainL);
+            *destinationR++ = static_cast<float>(inputL * gainR);
+        }
+        VectorMath::multiplyByScalar(sourceL, gainL, destinationL, framesToProcess);
+        VectorMath::multiplyByScalar(sourceL, gainR, destinationR, framesToProcess);
     } else {
         double panRadian = (targetPan <= 0 ? targetPan + 1 : targetPan) * piOverTwoDouble;
 …
         double gainL = cos(panRadian);
         double gainR = sin(panRadian);
+        while (n--) {
+            float inputL = *sourceL++;
+            float inputR = *sourceR++;
+            if (targetPan <= 0) {
+                *destinationL++ = static_cast<float>(inputL + inputR * gainL);
+                *destinationR++ = static_cast<float>(inputR * gainR);
+            } else {
+                *destinationL++ = static_cast<float>(inputL * gainL);
+                *destinationR++ = static_cast<float>(inputR + inputL * gainR);
+            }
+        if (targetPan <= 0) {
+            VectorMath::multiplyByScalarThenAddToVector(sourceR, gainL, sourceL, destinationL, framesToProcess);
+            VectorMath::multiplyByScalar(sourceR, gainR, destinationR, framesToProcess);
+        } else {
+            VectorMath::multiplyByScalar(sourceL, gainL, destinationL, framesToProcess);
+            VectorMath::multiplyByScalarThenAddToVector(sourceL, gainR, sourceR, destinationR, framesToProcess);
+        }
+    }

trunk/Source/WebCore/platform/audio/VectorMath.cpp

-              r268506
+              r268539
 // On the Mac we use the highly optimized versions in Accelerate.framework
 void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
+{
     vDSP_vsmul(inputVector, 1, &scale, outputVector, 1, numberOfElementsToProcess);
+void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
+{
+    vDSP_vsmul(inputVector, 1, &scalar, outputVector, 1, numberOfElementsToProcess);
+}
 …
+}
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
+{
+    vDSP_vsma(inputVector, 1, &scale, outputVector, 1, outputVector, 1, numberOfElementsToProcess);
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
+{
+    vDSP_vsma(inputVector, 1, &scalar, outputVector, 1, outputVector, 1, numberOfElementsToProcess);
+}
+void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess)
+{
+    vDSP_vsma(inputVector1, 1, &scalar, inputVector2, 1, outputVector, 1, numberOfElementsToProcess);
+}
 …
+}
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
+void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess)
+{
+    multiplyByScalarThenAddToOutput(inputVector1, scalar, outputVector, numberOfElementsToProcess);
+    add(outputVector, inputVector2, outputVector, numberOfElementsToProcess);
+}
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
+{
     size_t n = numberOfElementsToProcess;
 …
     // If the inputVector address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
     while (!is16ByteAligned(inputVector) && n) {
         *outputVector += scale * *inputVector;
+        *outputVector += scalar * *inputVector;
         inputVector++;
         outputVector++;
 …
     __m128 dest;
     __m128 temp;
     __m128 mScale = _mm_set_ps1(scale);
+    __m128 mScale = _mm_set_ps1(scalar);
     bool destAligned = is16ByteAligned(outputVector);
 …
     const float* endP = outputVector + n - tailFrames;
     float32x4_t k = vdupq_n_f32(scale);
+    float32x4_t k = vdupq_n_f32(scalar);
     while (outputVector < endP) {
         float32x4_t source = vld1q_f32(inputVector);
 …
 #endif
     while (n--) {
         *outputVector += *inputVector * scale;
+        *outputVector += *inputVector * scalar;
         ++inputVector;
         ++outputVector;
 …
+}
 void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
+void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
+{
     size_t n = numberOfElementsToProcess;
 …
     // If the inputVector address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
     while (!is16ByteAligned(inputVector) && n) {
         *outputVector = scale * *inputVector;
+        *outputVector = scalar * *inputVector;
         inputVector++;
         outputVector++;
 …
     // Now the inputVector address is aligned and start to apply SSE.
     size_t group = n / 4;
     __m128 mScale = _mm_set_ps1(scale);
+    __m128 mScale = _mm_set_ps1(scalar);
     __m128* pSource;
     __m128* pDest;
 …
     while (outputVector < endP) {
         float32x4_t source = vld1q_f32(inputVector);
         vst1q_f32(outputVector, vmulq_n_f32(source, scale));
+        vst1q_f32(outputVector, vmulq_n_f32(source, scalar));
         inputVector += 4;
 …
 #endif
     while (n--) {
         *outputVector = scale * *inputVector;
+        *outputVector = scalar * *inputVector;
         ++inputVector;
         ++outputVector;

trunk/Source/WebCore/platform/audio/VectorMath.h

-              r268506
+              r268539
 namespace VectorMath {
 // Multiples inputVector by scalar then adds the result to outputVector (vsma).
+// Multiples inputVector by scalar then adds the result to outputVector (simplified vsma).
 // for (n = 0; n < numberOfElementsToProcess; ++n)
+//     outputVector[n] += inputVector[n] * scale;
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess);
+//     outputVector[n] += inputVector[n] * scalar;
+void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
+// Adds a vector inputVector2 to the product of a scalar value and a single-precision vector inputVector1 (vsma).
+// for (n = 0; n < numberOfElementsToProcess; ++n)
+//     outputVector[n] = inputVector1[n] * scalar + inputVector2[n];
+void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess);
 // Multiplies the sum of two vectors by a scalar value (vasm).
 void addVectorsThenMultiplyByScalar(const float* inputVector1, const float* inputVector2, float scalar, float* outputVector, size_t numberOfElementsToProcess);
 void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess);
+void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
 void addScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
 void add(const float* inputVector1, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess);

Note: See TracChangeset for help on using the changeset viewer.