Changeset 104143 in webkit
- Timestamp:
- Jan 5, 2012 4:54:04 AM (12 years ago)
- Location:
- trunk/Source/WebCore
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r104142 r104143 1 2012-01-05 Xingnan Wang <xingnan.wang@intel.com> 2 3 Add a SSE2 optimized function zvmul in VectorMatch 4 https://bugs.webkit.org/show_bug.cgi?id=74842 5 6 Reviewed by Kenneth Russell. 7 8 Use zvmul in FFTFrameFFMPEG.cpp::multiply() and FFTFrameMac.cpp::multiply(). 9 10 * platform/audio/VectorMath.cpp: 11 (WebCore::VectorMath::zvmul): 12 * platform/audio/VectorMath.h: 13 * platform/audio/ffmpeg/FFTFrameFFMPEG.cpp: 14 (WebCore::FFTFrame::multiply): 15 * platform/audio/mac/FFTFrameMac.cpp: 16 (WebCore::FFTFrame::multiply): 17 1 18 2012-01-05 Alpha Lam <hclam@chromium.org> 2 19 -
trunk/Source/WebCore/platform/audio/VectorMath.cpp
r102702 r104143 73 73 } 74 74 75 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess) 76 { 77 DSPSplitComplex sc1; 78 DSPSplitComplex sc2; 79 DSPSplitComplex dest; 80 sc1.realp = real1P; 81 sc1.imagp = imag1P; 82 sc2.realp = real2P; 83 sc2.imagp = imag2P; 84 dest.realp = realDestP; 85 dest.imagp = imagDestP; 86 #if defined(__ppc__) || defined(__i386__) 87 ::zvmul(&sc1, 1, &sc2, 1, &dest, 1, framesToProcess, 1); 88 #else 89 vDSP_zvmul(&sc1, 1, &sc2, 1, &dest, 1, framesToProcess, 1); 90 #endif 91 } 92 75 93 #else 76 94 … … 299 317 } 300 318 319 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess) 320 { 321 unsigned i = 0; 322 #ifdef __SSE2__ 323 // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned. 324 // Otherwise, fall through to the scalar code below. 325 if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F) 326 && !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F) 327 && !(reinterpret_cast<uintptr_t>(real2P) & 0x0F) 328 && !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F) 329 && !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F) 330 && !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) { 331 332 unsigned endSize = framesToProcess - framesToProcess % 4; 333 while (i < endSize) { 334 __m128 real1 = _mm_load_ps(real1P + i); 335 __m128 real2 = _mm_load_ps(real2P + i); 336 __m128 imag1 = _mm_load_ps(imag1P + i); 337 __m128 imag2 = _mm_load_ps(imag2P + i); 338 __m128 real = _mm_mul_ps(real1, real2); 339 real = _mm_sub_ps(real, _mm_mul_ps(imag1, imag2)); 340 __m128 imag = _mm_mul_ps(real1, imag2); 341 imag = _mm_add_ps(imag, _mm_mul_ps(imag1, real2)); 342 _mm_store_ps(realDestP + i, real); 343 _mm_store_ps(imagDestP + i, imag); 344 i += 4; 345 } 346 } 347 #endif 348 for (; i < framesToProcess; ++i) { 349 realDestP[i] = real1P[i] * real2P[i] - imag1P[i] * imag2P[i]; 350 imagDestP[i] = real1P[i] * imag2P[i] + imag1P[i] * real2P[i]; 351 } 352 } 353 301 354 #endif // OS(DARWIN) 302 355 -
trunk/Source/WebCore/platform/audio/VectorMath.h
r102702 r104143 38 38 void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess); 39 39 40 // Multiplies two complex vectors. 41 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess); 42 40 43 } // namespace VectorMath 41 44 -
trunk/Source/WebCore/platform/audio/ffmpeg/FFTFrameFFMPEG.cpp
r95901 r104143 116 116 const float* imagP2 = frame2.imagData(); 117 117 118 unsigned halfSize = fftSize() / 2; 119 float real0 = realP1[0]; 120 float imag0 = imagP1[0]; 121 122 VectorMath::zvmul(realP1, imagP1, realP2, imagP2, realP1, imagP1, halfSize); 123 124 // Multiply the packed DC/nyquist component 125 realP1[0] = real0 * realP2[0]; 126 imagP1[0] = imag0 * imagP2[0]; 127 118 128 // Scale accounts the peculiar scaling of vecLib on the Mac. 119 129 // This ensures the right scaling all the way back to inverse FFT. … … 122 132 float scale = 0.5f; 123 133 124 // Multiply the packed DC/nyquist component 125 realP1[0] *= scale * realP2[0]; 126 imagP1[0] *= scale * imagP2[0]; 127 128 // Complex multiplication. If this loop turns out to be hot then 129 // we should use SSE or other intrinsics to accelerate it. 130 unsigned halfSize = fftSize() / 2; 131 132 for (unsigned i = 1; i < halfSize; ++i) { 133 float realResult = realP1[i] * realP2[i] - imagP1[i] * imagP2[i]; 134 float imagResult = realP1[i] * imagP2[i] + imagP1[i] * realP2[i]; 135 136 realP1[i] = scale * realResult; 137 imagP1[i] = scale * imagResult; 138 } 134 VectorMath::vsmul(realP1, 1, &scale, realP1, 1, halfSize); 135 VectorMath::vsmul(imagP1, 1, &scale, imagP1, 1, halfSize); 139 136 } 140 137 -
trunk/Source/WebCore/platform/audio/mac/FFTFrameMac.cpp
r95901 r104143 36 36 37 37 #include "FFTFrame.h" 38 39 #include "VectorMath.h" 38 40 39 41 namespace WebCore { … … 107 109 const float* imagP2 = frame2.imagData(); 108 110 111 unsigned halfSize = m_FFTSize / 2; 112 float real0 = realP1[0]; 113 float imag0 = imagP1[0]; 114 115 // Complex multiply 116 VectorMath::zvmul(realP1, imagP1, realP2, imagP2, realP1, imagP1, halfSize); 117 118 // Multiply the packed DC/nyquist component 119 realP1[0] = real0 * realP2[0]; 120 imagP1[0] = imag0 * imagP2[0]; 121 109 122 // Scale accounts for vecLib's peculiar scaling 110 123 // This ensures the right scaling all the way back to inverse FFT 111 124 float scale = 0.5f; 112 125 113 // Multiply packed DC/nyquist component 114 realP1[0] *= scale * realP2[0]; 115 imagP1[0] *= scale * imagP2[0]; 116 117 // Multiply the rest, skipping packed DC/Nyquist components 118 DSPSplitComplex sc1 = frame1.dspSplitComplex(); 119 sc1.realp++; 120 sc1.imagp++; 121 122 DSPSplitComplex sc2 = frame2.dspSplitComplex(); 123 sc2.realp++; 124 sc2.imagp++; 125 126 unsigned halfSize = m_FFTSize / 2; 127 128 // Complex multiply 129 vDSP_zvmul(&sc1, 1, &sc2, 1, &sc1, 1, halfSize - 1, 1 /* normal multiplication */); 130 131 // We've previously scaled the packed part, now scale the rest..... 132 vDSP_vsmul(sc1.realp, 1, &scale, sc1.realp, 1, halfSize - 1); 133 vDSP_vsmul(sc1.imagp, 1, &scale, sc1.imagp, 1, halfSize - 1); 126 VectorMath::vsmul(realP1, 1, &scale, realP1, 1, halfSize); 127 VectorMath::vsmul(imagP1, 1, &scale, imagP1, 1, halfSize); 134 128 } 135 129
Note: See TracChangeset
for help on using the changeset viewer.