small correction to last commit
This commit is contained in:
parent
609814afae
commit
a79e4bc24b
@ -458,7 +458,7 @@ SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** da
|
|||||||
|
|
||||||
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
|
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
|
||||||
dftemp2v = LVFU(data_fine[inbr][jnbr]);
|
dftemp2v = LVFU(data_fine[inbr][jnbr]);
|
||||||
dirwtv = _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
|
dirwtv = LVF(domkerv[indexihlp][indexjhlp]) / (vabsf(dftemp1v - dftemp2v) + thousandv);
|
||||||
valv += dirwtv * dftemp2v;
|
valv += dirwtv * dftemp2v;
|
||||||
normv += dirwtv;
|
normv += dirwtv;
|
||||||
}
|
}
|
||||||
|
@ -22,18 +22,18 @@ typedef __m128i vint2;
|
|||||||
//
|
//
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ ))
|
#if ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || __GNUC__ > 4) && (!defined(WIN32) || defined( __x86_64__ ))
|
||||||
#define LVF(x) _mm_load_ps(&x)
|
#define LVF(x) _mm_load_ps((float*)&x)
|
||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
#define LVFU(x) _mm_loadu_ps(&x)
|
||||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||||
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
|
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
|
||||||
#define LVF(x) _mm_loadu_ps(&x)
|
#define LVF(x) _mm_loadu_ps((float*)&x)
|
||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
#define LVFU(x) _mm_loadu_ps(&x)
|
||||||
#define STVF(x,y) _mm_storeu_ps(&x,y)
|
#define STVF(x,y) _mm_storeu_ps(&x,y)
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#define LVF(x) _mm_load_ps(&x)
|
#define LVF(x) _mm_load_ps((float*)&x)
|
||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
#define LVFU(x) _mm_loadu_ps(&x)
|
||||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||||
|
@ -513,7 +513,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
|
|||||||
|
|
||||||
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
|
for (int jnbr = j - scalewin, indexjhlp = 0; jnbr <= j + scalewin; jnbr += scale, indexjhlp++) {
|
||||||
dftemp2v = LVFU(data_fine[inbr][jnbr]);
|
dftemp2v = LVFU(data_fine[inbr][jnbr]);
|
||||||
dirwtv = ( _mm_load_ps((float*)&domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
|
dirwtv = ( LVF(domkerv[indexihlp][indexjhlp]) * rangefn[_mm_cvttps_epi32(vabsf(dftemp2v - dftemp1v))] );
|
||||||
valv += dirwtv * dftemp2v;
|
valv += dirwtv * dftemp2v;
|
||||||
normv += dirwtv;
|
normv += dirwtv;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user