Solve bug on Windows when using OpenMP+SSE in a more elegant way (see issue 1806)

This commit is contained in:
Philip Rinn
2013-03-29 15:19:22 +01:00
parent 5bd68ce99a
commit 87414bc8be
4 changed files with 25 additions and 20 deletions

View File

@@ -77,9 +77,12 @@ template<class T> void gaussVertical3 (T** src, T** dst, AlignedBufferMP<double>
}
}
#ifdef __SSE__
#ifdef __SSE__
#ifdef WIN32
template<class T> __attribute__((force_align_arg_pointer)) void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
#else
template<class T> void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
#endif
__m128 Tv,Tm1v,Tp1v;
__m128 c0v,c1v;
c0v = _mm_set1_ps(c0);
@@ -115,9 +118,11 @@ template<class T> void gaussVertical3Sse (T** src, T** dst, int W, int H, const
}
#ifdef WIN32
template<class T> __attribute__((force_align_arg_pointer)) void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
#else
template<class T> void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
#endif
float tmp[W][4] __attribute__ ((aligned (16)));
__m128 Tv,Tm1v,Tp1v;
@@ -170,8 +175,11 @@ template<class T> void gaussHorizontal3Sse (T** src, T** dst, int W, int H, cons
// fast gaussian approximation if the support window is large
#ifdef WIN32
template<class T> __attribute__((force_align_arg_pointer)) void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) {
#else
template<class T> void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) {
#endif
if (sigma<0.25) {
// dont perform filtering
if (src!=dst)
@@ -406,9 +414,12 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBufferMP<double
#endif
}
#ifdef __SSE__
#ifdef __SSE__
#ifdef WIN32
template<class T> __attribute__((force_align_arg_pointer)) void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) {
#else
template<class T> void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) {
#endif
if (sigma<0.25) {
// dont perform filtering
if (src!=dst)