small correction to last commit

This commit is contained in:
heckflosse 2016-04-22 13:25:52 +02:00
parent 609814afae
commit a79e4bc24b
3 changed files with 5 additions and 5 deletions

View File

@ -458,7 +458,7 @@ SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** da
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) { for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
dftemp2v = LVFU(data_fine[inbr][jnbr]); dftemp2v = LVFU(data_fine[inbr][jnbr]);
dirwtv = _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv); dirwtv = LVF(domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
valv += dirwtv * dftemp2v; valv += dirwtv * dftemp2v;
normv += dirwtv; normv += dirwtv;
} }

View File

@ -22,18 +22,18 @@ typedef __m128i vint2;
// //
#ifdef __GNUC__ #ifdef __GNUC__
#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ )) #if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ ))
#define LVF(x) _mm_load_ps(&x) #define LVF(x) _mm_load_ps((float*)&x)
#define LVFU(x) _mm_loadu_ps(&x) #define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_store_ps(&x,y) #define STVF(x,y) _mm_store_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y) #define STVFU(x,y) _mm_storeu_ps(&x,y)
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds #else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
#define LVF(x) _mm_loadu_ps(&x) #define LVF(x) _mm_loadu_ps((float*)&x)
#define LVFU(x) _mm_loadu_ps(&x) #define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_storeu_ps(&x,y) #define STVF(x,y) _mm_storeu_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y) #define STVFU(x,y) _mm_storeu_ps(&x,y)
#endif #endif
#else #else
#define LVF(x) _mm_load_ps(&x) #define LVF(x) _mm_load_ps((float*)&x)
#define LVFU(x) _mm_loadu_ps(&x) #define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_store_ps(&x,y) #define STVF(x,y) _mm_store_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y) #define STVFU(x,y) _mm_storeu_ps(&x,y)

View File

@ -513,7 +513,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) { for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
dftemp2v = LVFU(data_fine[inbr][jnbr]); dftemp2v = LVFU(data_fine[inbr][jnbr]);
dirwtv = ( _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] ); dirwtv = ( LVF(domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
valv += dirwtv * dftemp2v; valv += dirwtv * dftemp2v;
normv += dirwtv; normv += dirwtv;
} }