diff --git a/rtengine/dirpyr_equalizer.cc b/rtengine/dirpyr_equalizer.cc index 9d0d2f9d5..f82b7f8bf 100644 --- a/rtengine/dirpyr_equalizer.cc +++ b/rtengine/dirpyr_equalizer.cc @@ -458,7 +458,7 @@ SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** da for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) { dftemp2v = LVFU(data_fine[inbr][jnbr]); - dirwtv = _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv); + dirwtv = LVF(domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv); valv += dirwtv * dftemp2v; normv += dirwtv; } diff --git a/rtengine/helpersse2.h b/rtengine/helpersse2.h index d4290bacc..da1691748 100644 --- a/rtengine/helpersse2.h +++ b/rtengine/helpersse2.h @@ -22,18 +22,18 @@ typedef __m128i vint2; // #ifdef __GNUC__ #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ )) -#define LVF(x) _mm_load_ps(&x) +#define LVF(x) _mm_load_ps((float*)&x) #define LVFU(x) _mm_loadu_ps(&x) #define STVF(x,y) _mm_store_ps(&x,y) #define STVFU(x,y) _mm_storeu_ps(&x,y) #else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds -#define LVF(x) _mm_loadu_ps(&x) +#define LVF(x) _mm_loadu_ps((float*)&x) #define LVFU(x) _mm_loadu_ps(&x) #define STVF(x,y) _mm_storeu_ps(&x,y) #define STVFU(x,y) _mm_storeu_ps(&x,y) #endif #else -#define LVF(x) _mm_load_ps(&x) +#define LVF(x) _mm_load_ps((float*)&x) #define LVFU(x) _mm_loadu_ps(&x) #define STVF(x,y) _mm_store_ps(&x,y) #define STVFU(x,y) _mm_storeu_ps(&x,y) diff --git a/rtengine/shmap.cc b/rtengine/shmap.cc index 38ac68a12..ac77da25d 100644 --- a/rtengine/shmap.cc +++ b/rtengine/shmap.cc @@ -513,7 +513,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) { dftemp2v = LVFU(data_fine[inbr][jnbr]); - dirwtv = ( _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] ); + dirwtv = ( LVF(domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] ); valv += dirwtv * dftemp2v; normv += dirwtv; }