Changeset 110744 in webkit
- Timestamp:
- Mar 14, 2012 2:01:37 PM (12 years ago)
- Location:
- trunk/Source/WebCore
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Source/WebCore/ChangeLog
r110741 r110744 1 2012-03-14 Xingnan Wang <xingnan.wang@intel.com> 2 3 Optimize the multiply-add in Biquad.cpp::process 4 https://bugs.webkit.org/show_bug.cgi?id=75528 5 6 Reviewed by Chris Rogers. 7 8 Pipeline the multiply-add with SSE2 instructions and get about 20% improvement for the function. 9 10 * platform/audio/Biquad.cpp: 11 (WebCore::Biquad::process): 12 1 13 2012-03-14 James Robinson <jamesr@chromium.org> 2 14 -
trunk/Source/WebCore/platform/audio/Biquad.cpp
r109458 r110744 97 97 double a2 = m_a2; 98 98 99 // Optimize the hot multiply-add by pipelining with SSE2 instructions. 100 #ifdef __SSE2__ 101 double na1 = -a1; 102 double na2 = -a2; 103 104 __asm__( 105 "movl %4, %%edx\n\t" // move sourceP to edx 106 "movl %5, %%ecx\n\t" // move destP to ecx 107 "movl %6, %%eax\n\t" // move n to eax 108 "testl %%eax, %%eax\n\t" 109 "je .LabelEnd\n\t" 110 "movss (%%edx), %%xmm7\n\t" // load x to xmm7[63:0] 111 "cvtss2sd %%xmm7, %%xmm1\n\t" // convert x from float to double 112 "movlpd %1, %%xmm0\n\t" // move x2 to xmm0[63:0] 113 "movlpd %9, %%xmm2\n\t" // move b2 to xmm2[63:0] 114 "movlpd %7, %%xmm3\n\t" // move b0 to xmm3[63:0] 115 "movhpd %0, %%xmm0\n\t" // move x1 to xmm0[127:64] ----> (x1 x2) 116 "movhpd %3, %%xmm1\n\t" // move y2 to xmm1[127:64] ----> (y2 x ) 117 "movhpd %8, %%xmm2\n\t" // move b1 to xmm2[127:64] ----> (b1 b2) 118 "movhpd %11, %%xmm3\n\t" // move a2 to xmm3[127:64] ----> (a2 b0) 119 "movlpd %2, %%xmm4\n\t" // move y1 to xmm4[63:0] 120 ".LabelLoop:\n\t" 121 "addl $4, %%edx\n\t" // sourceP++ 122 "movapd %%xmm0, %%xmm5\n\t" // copy (x1 x2) 123 "movapd %%xmm1, %%xmm6\n\t" // copy (y2 x ) 124 "shufpd $0, %%xmm4, %%xmm1\n\t" // y2=y1 125 "mulpd %%xmm2, %%xmm5\n\t" // (x1*b1 x2*b2) 126 "mulpd %%xmm3, %%xmm6\n\t" // (y2*a2 x *b0) 127 "shufpd $1, %%xmm1, %%xmm0\n\t" // x2=x1 x1=x 128 "mulsd %10, %%xmm4\n\t" // a1*y1 129 "addpd %%xmm6, %%xmm5\n\t" // (x1*b1+y2*a2 x2*b2+x*b0) 130 "subl $1, %%eax\n\t" // n-- 131 "movapd %%xmm5, %%xmm6\n\t" 132 "movss (%%edx), %%xmm7\n\t" // load x 133 "cvtss2sd %%xmm7, %%xmm1\n\t" // cvt x from float to double x = new x 134 "addsd %%xmm4, %%xmm5\n\t" // a1*y1 + (x2*b2+x*b0) 135 "shufpd $1, %%xmm6, %%xmm6\n\t" // (x1*b1+y2*a2 x2*b2+x*b0) -> (x2*b2+x*b0 x1*b1+y2*a2) 136 "addsd %%xmm6, %%xmm5\n\t" // y 137 "cvtsd2ss %%xmm5, %%xmm7\n\t" 138 "movss %%xmm7, (%%ecx)\n\t" // y -> *destP 139 "movapd %%xmm5, %%xmm4\n\t" // y1 = y 140 "addl $4, %%ecx\n\t" // destP++ 141 "testl %%eax, %%eax\n\t" 142 "jne .LabelLoop\n\t" // while() 143 "movhpd %%xmm0, %0\n\t" 144 "movlpd %%xmm0, %1\n\t" 145 "movlpd %%xmm4, %2\n\t" 146 "movhpd %%xmm1, %3\n\t" 147 ".LabelEnd:\n\t" 148 :"+m"(x1), "+m"(x2), "+m"(y1), "+m"(y2) 149 :"m"(sourceP), "m"(destP), "m"(n), "m"(b0), "m"(b1), "m"(b2), "m"(na1), "m"(na2) 150 :"eax", "edx", "ecx", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 151 ); 152 #else 99 153 while (n--) { 100 154 // FIXME: this can be optimized by pipelining the multiply adds... … … 110 164 y1 = y; 111 165 } 166 #endif 112 167 113 168 // Local variables back to member. Flush denormals here so we
Note: See TracChangeset
for help on using the changeset viewer.