From 87414bc8be82751a584dc8fa92f04708abf25be0 Mon Sep 17 00:00:00 2001 From: Philip Rinn Date: Fri, 29 Mar 2013 15:19:22 +0100 Subject: [PATCH] Solve bug on Windows when using OpenMP+SSE in a more elegant way (see issue 1806) --- CMakeLists.txt | 13 ++----------- rtengine/CMakeLists.txt | 2 +- rtengine/gauss.h | 25 ++++++++++++++++++------- rtengine/ipsharpen.cc | 5 ++++- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b5eb37e4..d79f358ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,16 +30,7 @@ set (CACHE_NAME_SUFFIX "" CACHE STRING "RawTherapee's cache folder suffix (leave set (PROC_TARGET_NUMBER 0 CACHE STRING "Selected target processor from the list above (taken from ProcessorTargets.cmake)") # The following line set special compilation flags for RTEngine, and will be added to CMAKE_CXX_FLAGS -# Due to a bug in GCC when using OpenMP+SSE, -mstackrealign will be added to your flags -set (RTENGINE_CXX_FLAGS "" CACHE STRING "Special compilation flags for RTEngine; -mstackrealign will be added to your flags") - -# mandatory flags for rtengine for all platforms, depending on the bit depth -set (RTENGINE_CXX_FLAGS_MANDATORY "-mstackrealign") -if (CMAKE_SIZEOF_VOID_P EQUAL 4) - set (RTENGINE_CXX_FLAGS_MANDATORY "${RTENGINE_CXX_FLAGS_MANDATORY} -mpreferred-stack-boundary=4") -#else (CMAKE_SIZEOF_VOID_P EQUAL 8) -# set (RTENGINE_CXX_FLAGS_MANDATORY "${RTENGINE_CXX_FLAGS_MANDATORY} ") -endif (CMAKE_SIZEOF_VOID_P EQUAL 4) +set (RTENGINE_CXX_FLAGS "" CACHE STRING "Special compilation flags for RTEngine") #loading the processor targets list include (ProcessorTargets.cmake) @@ -281,7 +272,7 @@ else () endif () # Get c++ and linker flags for rtengine (the gui's c++ flags may have less flags) -set(CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPER_CMAKE_BUILD_TYPE}} ${RTENGINE_CXX_FLAGS} ${RTENGINE_CXX_FLAGS_MANDATORY}") +set(CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPER_CMAKE_BUILD_TYPE}} ${RTENGINE_CXX_FLAGS}") set(LFLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${UPPER_CMAKE_BUILD_TYPE}}") set(ABOUT_COMMAND_WITH_ARGS ${CMAKE_COMMAND} diff --git a/rtengine/CMakeLists.txt b/rtengine/CMakeLists.txt index 2e22f2144..dfac79d92 100644 --- a/rtengine/CMakeLists.txt +++ b/rtengine/CMakeLists.txt @@ -29,7 +29,7 @@ IF (BUILD_SHARED_LIBS) install (TARGETS rtengine DESTINATION ${LIBDIR}) ENDIF (BUILD_SHARED_LIBS) -set_target_properties (rtengine PROPERTIES COMPILE_FLAGS "${RTENGINE_CXX_FLAGS} ${RTENGINE_CXX_FLAGS_MANDATORY}") +set_target_properties (rtengine PROPERTIES COMPILE_FLAGS "${RTENGINE_CXX_FLAGS}") target_link_libraries (rtengine rtexif ${EXTRA_LIB} ${GOBJECT_LIBRARIES} ${GTHREAD_LIBRARIES} ${GLIB2_LIBRARIES} ${GLIBMM_LIBRARIES} ${LCMS_LIBRARIES} ${EXPAT_LIBRARIES} ${FFTW3F_LIBRARIES} ${IPTCDATA_LIBRARIES} diff --git a/rtengine/gauss.h b/rtengine/gauss.h index 322fb1774..7605b8789 100644 --- a/rtengine/gauss.h +++ b/rtengine/gauss.h @@ -77,9 +77,12 @@ template void gaussVertical3 (T** src, T** dst, AlignedBufferMP } } -#ifdef __SSE__ +#ifdef __SSE__ +#ifdef WIN32 +template __attribute__((force_align_arg_pointer)) void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) { +#else template void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) { - +#endif __m128 Tv,Tm1v,Tp1v; __m128 c0v,c1v; c0v = _mm_set1_ps(c0); @@ -115,9 +118,11 @@ template void gaussVertical3Sse (T** src, T** dst, int W, int H, const } - +#ifdef WIN32 +template __attribute__((force_align_arg_pointer)) void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) { +#else template void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) { - +#endif float tmp[W][4] __attribute__ ((aligned (16))); __m128 Tv,Tm1v,Tp1v; @@ -170,8 +175,11 @@ template void gaussHorizontal3Sse (T** src, T** dst, int W, int H, cons // fast gaussian approximation if the support window is large +#ifdef WIN32 +template __attribute__((force_align_arg_pointer)) void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) { +#else template void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) { - +#endif if (sigma<0.25) { // dont perform filtering if (src!=dst) @@ -406,9 +414,12 @@ template void gaussHorizontal (T** src, T** dst, AlignedBufferMP __attribute__((force_align_arg_pointer)) void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) { +#else template void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) { - +#endif if (sigma<0.25) { // dont perform filtering if (src!=dst) diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc index 60c066967..0421347a9 100644 --- a/rtengine/ipsharpen.cc +++ b/rtengine/ipsharpen.cc @@ -40,8 +40,11 @@ namespace rtengine { extern const Settings* settings; - +#if defined( __SSE__ ) && defined( WIN32 ) +__attribute__((force_align_arg_pointer)) void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) { +#else void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) { +#endif const float dampingFac=-2.0/(damping*damping);