Solve bug on Windows when using OpenMP+SSE in a more elegant way (see issue 1806)
This commit is contained in:
parent
5bd68ce99a
commit
87414bc8be
@ -30,16 +30,7 @@ set (CACHE_NAME_SUFFIX "" CACHE STRING "RawTherapee's cache folder suffix (leave
|
|||||||
set (PROC_TARGET_NUMBER 0 CACHE STRING "Selected target processor from the list above (taken from ProcessorTargets.cmake)")
|
set (PROC_TARGET_NUMBER 0 CACHE STRING "Selected target processor from the list above (taken from ProcessorTargets.cmake)")
|
||||||
|
|
||||||
# The following line set special compilation flags for RTEngine, and will be added to CMAKE_CXX_FLAGS
|
# The following line set special compilation flags for RTEngine, and will be added to CMAKE_CXX_FLAGS
|
||||||
# Due to a bug in GCC when using OpenMP+SSE, -mstackrealign will be added to your flags
|
set (RTENGINE_CXX_FLAGS "" CACHE STRING "Special compilation flags for RTEngine")
|
||||||
set (RTENGINE_CXX_FLAGS "" CACHE STRING "Special compilation flags for RTEngine; -mstackrealign will be added to your flags")
|
|
||||||
|
|
||||||
# mandatory flags for rtengine for all platforms, depending on the bit depth
|
|
||||||
set (RTENGINE_CXX_FLAGS_MANDATORY "-mstackrealign")
|
|
||||||
if (CMAKE_SIZEOF_VOID_P EQUAL 4)
|
|
||||||
set (RTENGINE_CXX_FLAGS_MANDATORY "${RTENGINE_CXX_FLAGS_MANDATORY} -mpreferred-stack-boundary=4")
|
|
||||||
#else (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
|
||||||
# set (RTENGINE_CXX_FLAGS_MANDATORY "${RTENGINE_CXX_FLAGS_MANDATORY} ")
|
|
||||||
endif (CMAKE_SIZEOF_VOID_P EQUAL 4)
|
|
||||||
|
|
||||||
#loading the processor targets list
|
#loading the processor targets list
|
||||||
include (ProcessorTargets.cmake)
|
include (ProcessorTargets.cmake)
|
||||||
@ -281,7 +272,7 @@ else ()
|
|||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
# Get c++ and linker flags for rtengine (the gui's c++ flags may have less flags)
|
# Get c++ and linker flags for rtengine (the gui's c++ flags may have less flags)
|
||||||
set(CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPER_CMAKE_BUILD_TYPE}} ${RTENGINE_CXX_FLAGS} ${RTENGINE_CXX_FLAGS_MANDATORY}")
|
set(CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPER_CMAKE_BUILD_TYPE}} ${RTENGINE_CXX_FLAGS}")
|
||||||
set(LFLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${UPPER_CMAKE_BUILD_TYPE}}")
|
set(LFLAGS "${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${UPPER_CMAKE_BUILD_TYPE}}")
|
||||||
|
|
||||||
set(ABOUT_COMMAND_WITH_ARGS ${CMAKE_COMMAND}
|
set(ABOUT_COMMAND_WITH_ARGS ${CMAKE_COMMAND}
|
||||||
|
@ -29,7 +29,7 @@ IF (BUILD_SHARED_LIBS)
|
|||||||
install (TARGETS rtengine DESTINATION ${LIBDIR})
|
install (TARGETS rtengine DESTINATION ${LIBDIR})
|
||||||
ENDIF (BUILD_SHARED_LIBS)
|
ENDIF (BUILD_SHARED_LIBS)
|
||||||
|
|
||||||
set_target_properties (rtengine PROPERTIES COMPILE_FLAGS "${RTENGINE_CXX_FLAGS} ${RTENGINE_CXX_FLAGS_MANDATORY}")
|
set_target_properties (rtengine PROPERTIES COMPILE_FLAGS "${RTENGINE_CXX_FLAGS}")
|
||||||
|
|
||||||
target_link_libraries (rtengine rtexif ${EXTRA_LIB} ${GOBJECT_LIBRARIES} ${GTHREAD_LIBRARIES}
|
target_link_libraries (rtengine rtexif ${EXTRA_LIB} ${GOBJECT_LIBRARIES} ${GTHREAD_LIBRARIES}
|
||||||
${GLIB2_LIBRARIES} ${GLIBMM_LIBRARIES} ${LCMS_LIBRARIES} ${EXPAT_LIBRARIES} ${FFTW3F_LIBRARIES} ${IPTCDATA_LIBRARIES}
|
${GLIB2_LIBRARIES} ${GLIBMM_LIBRARIES} ${LCMS_LIBRARIES} ${EXPAT_LIBRARIES} ${FFTW3F_LIBRARIES} ${IPTCDATA_LIBRARIES}
|
||||||
|
@ -77,9 +77,12 @@ template<class T> void gaussVertical3 (T** src, T** dst, AlignedBufferMP<double>
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE__
|
#ifdef __SSE__
|
||||||
|
#ifdef WIN32
|
||||||
|
template<class T> __attribute__((force_align_arg_pointer)) void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
|
||||||
|
#else
|
||||||
template<class T> void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
|
template<class T> void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
|
||||||
|
#endif
|
||||||
__m128 Tv,Tm1v,Tp1v;
|
__m128 Tv,Tm1v,Tp1v;
|
||||||
__m128 c0v,c1v;
|
__m128 c0v,c1v;
|
||||||
c0v = _mm_set1_ps(c0);
|
c0v = _mm_set1_ps(c0);
|
||||||
@ -115,9 +118,11 @@ template<class T> void gaussVertical3Sse (T** src, T** dst, int W, int H, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
template<class T> __attribute__((force_align_arg_pointer)) void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
|
||||||
|
#else
|
||||||
template<class T> void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
|
template<class T> void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) {
|
||||||
|
#endif
|
||||||
float tmp[W][4] __attribute__ ((aligned (16)));
|
float tmp[W][4] __attribute__ ((aligned (16)));
|
||||||
|
|
||||||
__m128 Tv,Tm1v,Tp1v;
|
__m128 Tv,Tm1v,Tp1v;
|
||||||
@ -170,8 +175,11 @@ template<class T> void gaussHorizontal3Sse (T** src, T** dst, int W, int H, cons
|
|||||||
|
|
||||||
|
|
||||||
// fast gaussian approximation if the support window is large
|
// fast gaussian approximation if the support window is large
|
||||||
|
#ifdef WIN32
|
||||||
|
template<class T> __attribute__((force_align_arg_pointer)) void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) {
|
||||||
|
#else
|
||||||
template<class T> void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) {
|
template<class T> void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma) {
|
||||||
|
#endif
|
||||||
if (sigma<0.25) {
|
if (sigma<0.25) {
|
||||||
// dont perform filtering
|
// dont perform filtering
|
||||||
if (src!=dst)
|
if (src!=dst)
|
||||||
@ -406,9 +414,12 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBufferMP<double
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE__
|
#ifdef __SSE__
|
||||||
|
#ifdef WIN32
|
||||||
|
template<class T> __attribute__((force_align_arg_pointer)) void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) {
|
||||||
|
#else
|
||||||
template<class T> void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) {
|
template<class T> void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma) {
|
||||||
|
#endif
|
||||||
if (sigma<0.25) {
|
if (sigma<0.25) {
|
||||||
// dont perform filtering
|
// dont perform filtering
|
||||||
if (src!=dst)
|
if (src!=dst)
|
||||||
|
@ -40,8 +40,11 @@ namespace rtengine {
|
|||||||
|
|
||||||
|
|
||||||
extern const Settings* settings;
|
extern const Settings* settings;
|
||||||
|
#if defined( __SSE__ ) && defined( WIN32 )
|
||||||
|
__attribute__((force_align_arg_pointer)) void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) {
|
||||||
|
#else
|
||||||
void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) {
|
void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) {
|
||||||
|
#endif
|
||||||
|
|
||||||
const float dampingFac=-2.0/(damping*damping);
|
const float dampingFac=-2.0/(damping*damping);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user