Use vsqrtf instead of _mm_sqrt_ps

This commit is contained in:
heckflosse 2018-11-05 16:02:32 +01:00
parent 0983817434
commit 8a31f0368c
4 changed files with 6 additions and 6 deletions

View File

@ -731,7 +731,7 @@ float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float
gxv = (LVFU(rg[x + 1]) - LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + w])); gxv = (LVFU(rg[x + 1]) - LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + w]));
gyv = (LVFU(rg[x + w]) - LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + 1])); gyv = (LVFU(rg[x + w]) - LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + 1]));
//Apply power to the magnitude of the gradient to get the edge stopping function. //Apply power to the magnitude of the gradient to get the edge stopping function.
_mm_storeu_ps( &a[x + w * y], Scalev * pow_F((zd5v * _mm_sqrt_ps(gxv * gxv + gyv * gyv + sqrepsv)), EdgeStoppingv) ); _mm_storeu_ps( &a[x + w * y], Scalev * pow_F((zd5v * vsqrtf(gxv * gxv + gyv * gyv + sqrepsv)), EdgeStoppingv) );
} }
for(; x < w1; x++) { for(; x < w1; x++) {

View File

@ -191,7 +191,7 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
int j; int j;
for(j = 0; j < srcwidth - 3; j += 4) { for(j = 0; j < srcwidth - 3; j += 4) {
_mm_storeu_ps(&tmpChr[i][j], _mm_sqrt_ps(SQRV(LVFU(l_b[i][j])) + SQRV(LVFU(l_a[i][j]))) / div); _mm_storeu_ps(&tmpChr[i][j], vsqrtf(SQRV(LVFU(l_b[i][j])) + SQRV(LVFU(l_a[i][j]))) / div);
} }
for(; j < srcwidth; j++) { for(; j < srcwidth; j++) {

View File

@ -4407,7 +4407,7 @@ void ImProcFunctions::chromiLuminanceCurve (PipetteBuffer *pipetteBuffer, int pW
av = LVFU (lold->a[i][k]); av = LVFU (lold->a[i][k]);
bv = LVFU (lold->b[i][k]); bv = LVFU (lold->b[i][k]);
STVF (HHBuffer[k], xatan2f (bv, av)); STVF (HHBuffer[k], xatan2f (bv, av));
STVF (CCBuffer[k], _mm_sqrt_ps (SQRV (av) + SQRV (bv)) / c327d68v); STVF (CCBuffer[k], vsqrtf (SQRV (av) + SQRV (bv)) / c327d68v);
} }
for (; k < W; k++) { for (; k < W; k++) {

View File

@ -736,7 +736,7 @@ void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const
av = LVFU(lab->a[i][j]); av = LVFU(lab->a[i][j]);
bv = LVFU(lab->b[i][j]); bv = LVFU(lab->b[i][j]);
huev = xatan2f(bv, av); huev = xatan2f(bv, av);
chrov = _mm_sqrt_ps(SQRV(av) + SQRV(bv)) / c327d68v; chrov = vsqrtf(SQRV(av) + SQRV(bv)) / c327d68v;
_mm_storeu_ps(&varhue[i1][j1], huev); _mm_storeu_ps(&varhue[i1][j1], huev);
_mm_storeu_ps(&varchro[i1][j1], chrov); _mm_storeu_ps(&varchro[i1][j1], chrov);
@ -1104,7 +1104,7 @@ void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const
bv = LVFU(labco->b[i1][col]); bv = LVFU(labco->b[i1][col]);
STVF(atan2Buffer[col], xatan2f(bv, av)); STVF(atan2Buffer[col], xatan2f(bv, av));
cv = _mm_sqrt_ps(SQRV(av) + SQRV(bv)); cv = vsqrtf(SQRV(av) + SQRV(bv));
yv = av / cv; yv = av / cv;
xv = bv / cv; xv = bv / cv;
xyMask = vmaskf_eq(zerov, cv); xyMask = vmaskf_eq(zerov, cv);
@ -1992,7 +1992,7 @@ void ImProcFunctions::WaveletAandBAllAB(wavelet_decomposition &WaveletCoeffs_a,
__m128 av = LVFU(WavCoeffs_a0[i * W_L + k]); __m128 av = LVFU(WavCoeffs_a0[i * W_L + k]);
__m128 bv = LVFU(WavCoeffs_b0[i * W_L + k]); __m128 bv = LVFU(WavCoeffs_b0[i * W_L + k]);
__m128 huev = xatan2f(bv, av); __m128 huev = xatan2f(bv, av);
__m128 chrv = _mm_sqrt_ps(SQRV(av) + SQRV(bv)); __m128 chrv = vsqrtf(SQRV(av) + SQRV(bv));
STVF(huebuffer[k], huev); STVF(huebuffer[k], huev);
STVF(chrbuffer[k], chrv); STVF(chrbuffer[k], chrv);
} }