small correction to last commit
This commit is contained in:
parent
609814afae
commit
a79e4bc24b
@ -458,7 +458,7 @@ SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** da
|
||||
|
||||
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
|
||||
dftemp2v = LVFU(data_fine[inbr][jnbr]);
|
||||
dirwtv = _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
|
||||
dirwtv = LVF(domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
|
||||
valv += dirwtv * dftemp2v;
|
||||
normv += dirwtv;
|
||||
}
|
||||
|
@ -22,18 +22,18 @@ typedef __m128i vint2;
|
||||
//
|
||||
#ifdef __GNUC__
|
||||
#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ ))
|
||||
#define LVF(x) _mm_load_ps(&x)
|
||||
#define LVF(x) _mm_load_ps((float*)&x)
|
||||
#define LVFU(x) _mm_loadu_ps(&x)
|
||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
|
||||
#define LVF(x) _mm_loadu_ps(&x)
|
||||
#define LVF(x) _mm_loadu_ps((float*)&x)
|
||||
#define LVFU(x) _mm_loadu_ps(&x)
|
||||
#define STVF(x,y) _mm_storeu_ps(&x,y)
|
||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||
#endif
|
||||
#else
|
||||
#define LVF(x) _mm_load_ps(&x)
|
||||
#define LVF(x) _mm_load_ps((float*)&x)
|
||||
#define LVFU(x) _mm_loadu_ps(&x)
|
||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||
|
@ -513,7 +513,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
|
||||
|
||||
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
|
||||
dftemp2v = LVFU(data_fine[inbr][jnbr]);
|
||||
dirwtv = ( _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
|
||||
dirwtv = ( LVF(domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
|
||||
valv += dirwtv * dftemp2v;
|
||||
normv += dirwtv;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user