small correction to last commit

This commit is contained in:
heckflosse 2016-04-22 13:25:52 +02:00
parent 609814afae
commit a79e4bc24b
3 changed files with 5 additions and 5 deletions

View File

@ -458,7 +458,7 @@ SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** da
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
dftemp2v = LVFU(data_fine[inbr][jnbr]);
dirwtv = _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
dirwtv = LVF(domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
valv += dirwtv * dftemp2v;
normv += dirwtv;
}

View File

@ -22,18 +22,18 @@ typedef __m128i vint2;
//
#ifdef __GNUC__
#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ ))
#define LVF(x) _mm_load_ps(&x)
#define LVF(x) _mm_load_ps((float*)&x)
#define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_store_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y)
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
#define LVF(x) _mm_loadu_ps(&x)
#define LVF(x) _mm_loadu_ps((float*)&x)
#define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_storeu_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y)
#endif
#else
#define LVF(x) _mm_load_ps(&x)
#define LVF(x) _mm_load_ps((float*)&x)
#define LVFU(x) _mm_loadu_ps(&x)
#define STVF(x,y) _mm_store_ps(&x,y)
#define STVFU(x,y) _mm_storeu_ps(&x,y)

View File

@ -513,7 +513,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
dftemp2v = LVFU(data_fine[inbr][jnbr]);
dirwtv = ( _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
dirwtv = ( LVF(domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
valv += dirwtv * dftemp2v;
normv += dirwtv;
}