diff --git a/rtengine/helpersse2.h b/rtengine/helpersse2.h index d9a7d6d03..1a4e5d6f8 100644 --- a/rtengine/helpersse2.h +++ b/rtengine/helpersse2.h @@ -9,7 +9,12 @@ #define INLINE inline #endif -#include +#if defined( WIN32 ) && defined(__x86_64__) + #include +#else + #include +#endif + #include typedef __m128d vdouble; diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc index fdd3da7a5..625865b02 100644 --- a/rtengine/ipsharpen.cc +++ b/rtengine/ipsharpen.cc @@ -20,7 +20,9 @@ #include "improcfun.h" #include "gauss.h" #include "bilateral2.h" -#include "rt_math.h" +#include "rt_math.h" +#include "sleef.c" +#include "sleefsseavx.c" #ifdef _OPENMP #include @@ -40,28 +42,75 @@ extern const Settings* settings; void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) { - const float dampingFac=2.0/(damping*damping); + const float dampingFac=-2.0/(damping*damping); + +#ifdef __SSE2__ + __m128 Iv,Ov,Uv,zerov,onev,fourv,fivev,dampingFacv,Tv; + zerov = _mm_setzero_ps( ); + onev = _mm_set1_ps( 1.0f ); + fourv = _mm_set1_ps( 4.0f ); + fivev = _mm_set1_ps( 5.0f ); + dampingFacv = _mm_set1_ps( dampingFac ); +#ifdef _OPENMP +#pragma omp for +#endif + for (int i=0; isharpening.enabled==false || params->sharpening.deconvamount<1) return; @@ -74,7 +123,7 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) { tmpI[i][j] = (float)lab->L[i][j]; } - float** tmp = (float**)b2; + float** tmp = (float**)b2; #ifdef _OPENMP #pragma omp parallel @@ -128,7 +177,6 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) { for (int i=0; i