Changeset 268539 in webkit


Ignore:
Timestamp:
Oct 15, 2020 11:21:25 AM (3 years ago)
Author:
Chris Dumez
Message:

Vectorize StereoPanner's panToTargetValue()
https://bugs.webkit.org/show_bug.cgi?id=217765

Reviewed by Geoffrey Garen.

Vectorize StereoPanner's panToTargetValue().

No new tests, no Web-facing behavior change.

  • platform/audio/StereoPanner.cpp:

(WebCore::StereoPanner::panToTargetValue):

  • platform/audio/VectorMath.cpp:

(WebCore::VectorMath::multiplyByScalar):
(WebCore::VectorMath::multiplyByScalarThenAddToOutput):
(WebCore::VectorMath::multiplyByScalarThenAddToVector):

  • platform/audio/VectorMath.h:
Location:
trunk/Source/WebCore
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/Source/WebCore/ChangeLog

    r268522 r268539  
     12020-10-15  Chris Dumez  <cdumez@apple.com>
     2
     3        Vectorize StereoPanner's panToTargetValue()
     4        https://bugs.webkit.org/show_bug.cgi?id=217765
     5
     6        Reviewed by Geoffrey Garen.
     7
     8        Vectorize StereoPanner's panToTargetValue().
     9
     10        No new tests, no Web-facing behavior change.
     11
     12        * platform/audio/StereoPanner.cpp:
     13        (WebCore::StereoPanner::panToTargetValue):
     14        * platform/audio/VectorMath.cpp:
     15        (WebCore::VectorMath::multiplyByScalar):
     16        (WebCore::VectorMath::multiplyByScalarThenAddToOutput):
     17        (WebCore::VectorMath::multiplyByScalarThenAddToVector):
     18        * platform/audio/VectorMath.h:
     19
    1202020-10-15  Chris Lord  <clord@igalia.com>
    221
  • trunk/Source/WebCore/platform/audio/StereoPanner.cpp

    r265962 r268539  
    3030#if ENABLE(WEB_AUDIO)
    3131
     32#include "VectorMath.h"
    3233#include <wtf/MathExtras.h>
    3334
     
    117118    if (!sourceL || !sourceR || !destinationL || !destinationR)
    118119        return;
    119    
     120
    120121    float targetPan = clampTo(panValue, -1.0, 1.0);
    121    
    122     int n = framesToProcess;
    123122   
    124123    if (numberOfInputChannels == 1) {
     
    128127        double gainR = sin(panRadian);
    129128       
    130         while (n--) {
    131             float inputL = *sourceL++;
    132             *destinationL++ = static_cast<float>(inputL * gainL);
    133             *destinationR++ = static_cast<float>(inputL * gainR);
    134         }
     129        VectorMath::multiplyByScalar(sourceL, gainL, destinationL, framesToProcess);
     130        VectorMath::multiplyByScalar(sourceL, gainR, destinationR, framesToProcess);
    135131    } else {
    136132        double panRadian = (targetPan <= 0 ? targetPan + 1 : targetPan) * piOverTwoDouble;
     
    138134        double gainL = cos(panRadian);
    139135        double gainR = sin(panRadian);
    140        
    141         while (n--) {
    142             float inputL = *sourceL++;
    143             float inputR = *sourceR++;
    144             if (targetPan <= 0) {
    145                 *destinationL++ = static_cast<float>(inputL + inputR * gainL);
    146                 *destinationR++ = static_cast<float>(inputR * gainR);
    147             } else {
    148                 *destinationL++ = static_cast<float>(inputL * gainL);
    149                 *destinationR++ = static_cast<float>(inputR + inputL * gainR);
    150             }
     136
     137        if (targetPan <= 0) {
     138            VectorMath::multiplyByScalarThenAddToVector(sourceR, gainL, sourceL, destinationL, framesToProcess);
     139            VectorMath::multiplyByScalar(sourceR, gainR, destinationR, framesToProcess);
     140        } else {
     141            VectorMath::multiplyByScalar(sourceL, gainL, destinationL, framesToProcess);
     142            VectorMath::multiplyByScalarThenAddToVector(sourceL, gainR, sourceR, destinationR, framesToProcess);
    151143        }
    152144    }
  • trunk/Source/WebCore/platform/audio/VectorMath.cpp

    r268506 r268539  
    5353// On the Mac we use the highly optimized versions in Accelerate.framework
    5454
    55 void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
    56 {
    57     vDSP_vsmul(inputVector, 1, &scale, outputVector, 1, numberOfElementsToProcess);
     55void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
     56{
     57    vDSP_vsmul(inputVector, 1, &scalar, outputVector, 1, numberOfElementsToProcess);
    5858}
    5959
     
    8787}
    8888
    89 void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
    90 {
    91     vDSP_vsma(inputVector, 1, &scale, outputVector, 1, outputVector, 1, numberOfElementsToProcess);
     89void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
     90{
     91    vDSP_vsma(inputVector, 1, &scalar, outputVector, 1, outputVector, 1, numberOfElementsToProcess);
     92}
     93
     94void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess)
     95{
     96    vDSP_vsma(inputVector1, 1, &scalar, inputVector2, 1, outputVector, 1, numberOfElementsToProcess);
    9297}
    9398
     
    129134}
    130135
    131 void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
     136void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess)
     137{
     138    multiplyByScalarThenAddToOutput(inputVector1, scalar, outputVector, numberOfElementsToProcess);
     139    add(outputVector, inputVector2, outputVector, numberOfElementsToProcess);
     140}
     141
     142void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
    132143{
    133144    size_t n = numberOfElementsToProcess;
     
    136147    // If the inputVector address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
    137148    while (!is16ByteAligned(inputVector) && n) {
    138         *outputVector += scale * *inputVector;
     149        *outputVector += scalar * *inputVector;
    139150        inputVector++;
    140151        outputVector++;
     
    149160    __m128 dest;
    150161    __m128 temp;
    151     __m128 mScale = _mm_set_ps1(scale);
     162    __m128 mScale = _mm_set_ps1(scalar);
    152163
    153164    bool destAligned = is16ByteAligned(outputVector);
     
    175186    const float* endP = outputVector + n - tailFrames;
    176187
    177     float32x4_t k = vdupq_n_f32(scale);
     188    float32x4_t k = vdupq_n_f32(scalar);
    178189    while (outputVector < endP) {
    179190        float32x4_t source = vld1q_f32(inputVector);
     
    189200#endif
    190201    while (n--) {
    191         *outputVector += *inputVector * scale;
     202        *outputVector += *inputVector * scalar;
    192203        ++inputVector;
    193204        ++outputVector;
     
    195206}
    196207
    197 void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess)
     208void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess)
    198209{
    199210    size_t n = numberOfElementsToProcess;
     
    202213    // If the inputVector address is not 16-byte aligned, the first several frames (at most three) should be processed separately.
    203214    while (!is16ByteAligned(inputVector) && n) {
    204         *outputVector = scale * *inputVector;
     215        *outputVector = scalar * *inputVector;
    205216        inputVector++;
    206217        outputVector++;
     
    210221    // Now the inputVector address is aligned and start to apply SSE.
    211222    size_t group = n / 4;
    212     __m128 mScale = _mm_set_ps1(scale);
     223    __m128 mScale = _mm_set_ps1(scalar);
    213224    __m128* pSource;
    214225    __m128* pDest;
     
    244255    while (outputVector < endP) {
    245256        float32x4_t source = vld1q_f32(inputVector);
    246         vst1q_f32(outputVector, vmulq_n_f32(source, scale));
     257        vst1q_f32(outputVector, vmulq_n_f32(source, scalar));
    247258
    248259        inputVector += 4;
     
    252263#endif
    253264    while (n--) {
    254         *outputVector = scale * *inputVector;
     265        *outputVector = scalar * *inputVector;
    255266        ++inputVector;
    256267        ++outputVector;
  • trunk/Source/WebCore/platform/audio/VectorMath.h

    r268506 r268539  
    3232namespace VectorMath {
    3333
    34 // Multiples inputVector by scalar then adds the result to outputVector (vsma).
     34// Multiples inputVector by scalar then adds the result to outputVector (simplified vsma).
    3535// for (n = 0; n < numberOfElementsToProcess; ++n)
    36 //     outputVector[n] += inputVector[n] * scale;
    37 void multiplyByScalarThenAddToOutput(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess);
     36//     outputVector[n] += inputVector[n] * scalar;
     37void multiplyByScalarThenAddToOutput(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
     38
     39// Adds a vector inputVector2 to the product of a scalar value and a single-precision vector inputVector1 (vsma).
     40// for (n = 0; n < numberOfElementsToProcess; ++n)
     41//     outputVector[n] = inputVector1[n] * scalar + inputVector2[n];
     42void multiplyByScalarThenAddToVector(const float* inputVector1, float scalar, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess);
    3843
    3944// Multiplies the sum of two vectors by a scalar value (vasm).
    4045void addVectorsThenMultiplyByScalar(const float* inputVector1, const float* inputVector2, float scalar, float* outputVector, size_t numberOfElementsToProcess);
    4146
    42 void multiplyByScalar(const float* inputVector, float scale, float* outputVector, size_t numberOfElementsToProcess);
     47void multiplyByScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
    4348void addScalar(const float* inputVector, float scalar, float* outputVector, size_t numberOfElementsToProcess);
    4449void add(const float* inputVector1, const float* inputVector2, float* outputVector, size_t numberOfElementsToProcess);
Note: See TracChangeset for help on using the changeset viewer.