Solve bug on Windows when using OpenMP+SSE in a more elegant way (see issue 1806)

This commit is contained in:
Philip Rinn 2013-03-29 15:19:22 +01:00
parent 5bd68ce99a
commit 87414bc8be
4 changed files with 25 additions and 20 deletions

View File

@ -30,16 +30,7 @@ set (CACHE_NAME_SUFFIX "" CACHE STRING "RawTherapee's cache folder suffix (leave
set (PROC_TARGET_NUMBER 0 CACHE STRING "Selected target processor from the list above (taken from ProcessorTargets.cmake)") set (PROC_TARGET_NUMBER 0 CACHE STRING "Selected target processor from the list above (taken from ProcessorTargets.cmake)")
# The following line set special compilation flags for RTEngine, and will be added to CMAKE_CXX_FLAGS # The following line set special compilation flags for RTEngine, and will be added to CMAKE_CXX_FLAGS
# Due to a bug in GCC when using OpenMP+SSE, -mstackrealign will be added to your flags set (RTENGINE_CXX_FLAGS "" CACHE STRING "Special compilation flags for RTEngine")
set (RTENGINE_CXX_FLAGS "" CACHE STRING "Special compilation flags for RTEngine; -mstackrealign will be added to your flags")
# mandatory flags for rtengine for all platforms, depending on the bit depth
set (RTENGINE_CXX_FLAGS_MANDATORY "-mstackrealign")
if (CMAKE_SIZEOF_VOID_P EQUAL 4)
set (RTENGINE_CXX_FLAGS_MANDATORY "${RTENGINE_CXX_FLAGS_MANDATORY} -mpreferred-stack-boundary=4")
#else (CMAKE_SIZEOF_VOID_P EQUAL 8)
# set (RTENGINE_CXX_FLAGS_MANDATORY "${RTENGINE_CXX_FLAGS_MANDATORY} ")
endif (CMAKE_SIZEOF_VOID_P EQUAL 4)
#loading the processor targets list #loading the processor targets list
include (ProcessorTargets.cmake) include (ProcessorTargets.cmake)
@ -281,7 +272,7 @@ else ()
endif () endif ()
# Get c++ and linker flags for rtengine (the gui's c++ flags may have less flags) # Get c++ and linker flags for rtengine (the gui's c++ flags may have less flags)
set(CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPER_CMAKE_BUILD_TYPE}} ${RTENGINE_CXX_FLAGS} ${RTENGINE_CXX_FLAGS_MANDATORY}") set(CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPER_CMAKE_BUILD_TYPE}} ${RTENGINE_CXX_FLAGS}")
set(LFLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${UPPER_CMAKE_BUILD_TYPE}}") set(LFLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${UPPER_CMAKE_BUILD_TYPE}}")
set(ABOUT_COMMAND_WITH_ARGS ${CMAKE_COMMAND} set(ABOUT_COMMAND_WITH_ARGS ${CMAKE_COMMAND}

View File

@ -29,7 +29,7 @@ IF (BUILD_SHARED_LIBS)
install (TARGETS rtengine DESTINATION ${LIBDIR}) install (TARGETS rtengine DESTINATION ${LIBDIR})
ENDIF (BUILD_SHARED_LIBS) ENDIF (BUILD_SHARED_LIBS)
set_target_properties (rtengine PROPERTIES COMPILE_FLAGS "${RTENGINE_CXX_FLAGS} ${RTENGINE_CXX_FLAGS_MANDATORY}") set_target_properties (rtengine PROPERTIES COMPILE_FLAGS "${RTENGINE_CXX_FLAGS}")
target_link_libraries (rtengine rtexif ${EXTRA_LIB} ${GOBJECT_LIBRARIES} ${GTHREAD_LIBRARIES} target_link_libraries (rtengine rtexif ${EXTRA_LIB} ${GOBJECT_LIBRARIES} ${GTHREAD_LIBRARIES}
${GLIB2_LIBRARIES} ${GLIBMM_LIBRARIES} ${LCMS_LIBRARIES} ${EXPAT_LIBRARIES} ${FFTW3F_LIBRARIES} ${IPTCDATA_LIBRARIES} ${GLIB2_LIBRARIES} ${GLIBMM_LIBRARIES} ${LCMS_LIBRARIES} ${EXPAT_LIBRARIES} ${FFTW3F_LIBRARIES} ${IPTCDATA_LIBRARIES}

View File

@ -77,9 +77,12 @@ template<class T> void gaussVertical3 (T** src, T** dst, AlignedBufferMP<double>
} }
} }
#ifdef __SSE__ #ifdef __SSE__
#ifdef WIN32
template<class T> __attribute__((force_align_arg_pointer)) void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
#else
template<class T> void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) { template<class T> void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
#endif
__m128 Tv,Tm1v,Tp1v; __m128 Tv,Tm1v,Tp1v;
__m128 c0v,c1v; __m128 c0v,c1v;
c0v = _mm_set1_ps(c0); c0v = _mm_set1_ps(c0);
@ -115,9 +118,11 @@ template<class T> void gaussVertical3Sse (T** src, T** dst, int W, int H, const
} }
#ifdef WIN32
template<class T> __attribute__((force_align_arg_pointer)) void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
#else
template<class T> void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) { template<class T> void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
#endif
float tmp[W][4] __attribute__ ((aligned (16))); float tmp[W][4] __attribute__ ((aligned (16)));
__m128 Tv,Tm1v,Tp1v; __m128 Tv,Tm1v,Tp1v;
@ -170,8 +175,11 @@ template<class T> void gaussHorizontal3Sse (T** src, T** dst, int W, int H, cons
// fast gaussian approximation if the support window is large // fast gaussian approximation if the support window is large
#ifdef WIN32
template<class T> __attribute__((force_align_arg_pointer)) void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) {
#else
template<class T> void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) { template<class T> void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) {
#endif
if (sigma<0.25) { if (sigma<0.25) {
// dont perform filtering // dont perform filtering
if (src!=dst) if (src!=dst)
@ -406,9 +414,12 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBufferMP<double
#endif #endif
} }
#ifdef __SSE__ #ifdef __SSE__
#ifdef WIN32
template<class T> __attribute__((force_align_arg_pointer)) void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) {
#else
template<class T> void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) { template<class T> void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) {
#endif
if (sigma<0.25) { if (sigma<0.25) {
// dont perform filtering // dont perform filtering
if (src!=dst) if (src!=dst)

View File

@ -40,8 +40,11 @@ namespace rtengine {
extern const Settings* settings; extern const Settings* settings;
#if defined( __SSE__ ) && defined( WIN32 )
__attribute__((force_align_arg_pointer)) void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) {
#else
void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) { void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) {
#endif
const float dampingFac=-2.0/(damping*damping); const float dampingFac=-2.0/(damping*damping);