Use vsqrtf instead of _mm_sqrt_ps

2018-11-05 16:02:32 +01:00 · 2018-11-05 16:02:32 +01:00 · 8a31f0368c
commit 8a31f0368c
parent 0983817434
4 changed files with 6 additions and 6 deletions
--- a/rtengine/EdgePreservingDecomposition.cc
+++ b/rtengine/EdgePreservingDecomposition.cc
@ -731,7 +731,7 @@ float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float
                gxv = (LVFU(rg[x + 1]) -  LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + w]));
                gyv = (LVFU(rg[x + w]) -  LVFU(rg[x])) + (LVFU(rg[x + w + 1]) - LVFU(rg[x + 1]));
                //Apply power to the magnitude of the gradient to get the edge stopping function.
-                _mm_storeu_ps( &a[x + w * y], Scalev * pow_F((zd5v * _mm_sqrt_ps(gxv * gxv + gyv * gyv + sqrepsv)), EdgeStoppingv) );
+                _mm_storeu_ps( &a[x + w * y], Scalev * pow_F((zd5v * vsqrtf(gxv * gxv + gyv * gyv + sqrepsv)), EdgeStoppingv) );
            }
            for(; x < w1; x++) {
--- a/rtengine/dirpyr_equalizer.cc
+++ b/rtengine/dirpyr_equalizer.cc
@ -191,7 +191,7 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
                int j;
                for(j = 0; j < srcwidth - 3; j += 4) {
-                    _mm_storeu_ps(&tmpChr[i][j], _mm_sqrt_ps(SQRV(LVFU(l_b[i][j])) + SQRV(LVFU(l_a[i][j]))) / div);
+                    _mm_storeu_ps(&tmpChr[i][j], vsqrtf(SQRV(LVFU(l_b[i][j])) + SQRV(LVFU(l_a[i][j]))) / div);
                }
                for(; j < srcwidth; j++) {
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@ -4407,7 +4407,7 @@ void ImProcFunctions::chromiLuminanceCurve (PipetteBuffer *pipetteBuffer, int pW
                    av = LVFU (lold->a[i][k]);
                    bv = LVFU (lold->b[i][k]);
                    STVF (HHBuffer[k], xatan2f (bv, av));
-                    STVF (CCBuffer[k], _mm_sqrt_ps (SQRV (av) + SQRV (bv)) / c327d68v);
+                    STVF (CCBuffer[k], vsqrtf (SQRV (av) + SQRV (bv)) / c327d68v);
                }
                for (; k < W; k++) {
--- a/rtengine/ipwavelet.cc
+++ b/rtengine/ipwavelet.cc
@ -736,7 +736,7 @@ void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const
                        av = LVFU(lab->a[i][j]);
                        bv = LVFU(lab->b[i][j]);
                        huev = xatan2f(bv, av);
-                        chrov = _mm_sqrt_ps(SQRV(av) + SQRV(bv)) / c327d68v;
+                        chrov = vsqrtf(SQRV(av) + SQRV(bv)) / c327d68v;
                        _mm_storeu_ps(&varhue[i1][j1], huev);
                        _mm_storeu_ps(&varchro[i1][j1], chrov);
@ -1104,7 +1104,7 @@ void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const
                                bv = LVFU(labco->b[i1][col]);
                                STVF(atan2Buffer[col], xatan2f(bv, av));
-                                cv = _mm_sqrt_ps(SQRV(av) + SQRV(bv));
+                                cv = vsqrtf(SQRV(av) + SQRV(bv));
                                yv = av / cv;
                                xv = bv / cv;
                                xyMask = vmaskf_eq(zerov, cv);
@ -1992,7 +1992,7 @@ void ImProcFunctions::WaveletAandBAllAB(wavelet_decomposition &WaveletCoeffs_a,
                    __m128 av = LVFU(WavCoeffs_a0[i * W_L + k]);
                    __m128 bv = LVFU(WavCoeffs_b0[i * W_L + k]);
                    __m128 huev = xatan2f(bv, av);
-                    __m128 chrv = _mm_sqrt_ps(SQRV(av) + SQRV(bv));
+                    __m128 chrv = vsqrtf(SQRV(av) + SQRV(bv));
                    STVF(huebuffer[k], huev);
                    STVF(chrbuffer[k], chrv);
                }