diff --git a/rtengine/EdgePreservingDecomposition.cc b/rtengine/EdgePreservingDecomposition.cc index 1c2d0219f..21514978b 100644 --- a/rtengine/EdgePreservingDecomposition.cc +++ b/rtengine/EdgePreservingDecomposition.cc @@ -291,7 +291,7 @@ bool MultiDiagonalSymmetricMatrix::LazySetEntry(float value, int row, int column return true; } -SSEFUNCTION void MultiDiagonalSymmetricMatrix::VectorProduct(float* RESTRICT Product, float* RESTRICT x) +void MultiDiagonalSymmetricMatrix::VectorProduct(float* RESTRICT Product, float* RESTRICT x) { int srm = StartRows[m - 1]; @@ -693,7 +693,7 @@ EdgePreservingDecomposition::~EdgePreservingDecomposition() delete A; } -SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float EdgeStopping, int Iterates, float *Blur, bool UseBlurForEdgeStop) +float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float EdgeStopping, int Iterates, float *Blur, bool UseBlurForEdgeStop) { if(Blur == nullptr) @@ -884,7 +884,7 @@ float *EdgePreservingDecomposition::CreateIteratedBlur(float *Source, float Scal return Blur; } -SSEFUNCTION void EdgePreservingDecomposition::CompressDynamicRange(float *Source, float Scale, float EdgeStopping, float CompressionExponent, float DetailBoost, int Iterates, int Reweightings) +void EdgePreservingDecomposition::CompressDynamicRange(float *Source, float Scale, float EdgeStopping, float CompressionExponent, float DetailBoost, int Iterates, int Reweightings) { if(w < 300 && h < 300) { // set number of Reweightings to zero for small images (thumbnails). We could try to find a better solution here. Reweightings = 0; diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index aec11e59b..cfb237d82 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -474,7 +474,7 @@ void ImProcFunctions::Tile_calc(int tilesize, int overlap, int kall, int imwidth int denoiseNestedLevels = 1; enum nrquality {QUALITY_STANDARD, QUALITY_HIGH}; -SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi) +void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi) { BENCHFUN //#ifdef _DEBUG @@ -783,7 +783,7 @@ BENCHFUN numthreads = MIN(numthreads, options.rgbDenoiseThreadLimit); } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP denoiseNestedLevels = omp_get_max_threads() / numthreads; bool oldNested = omp_get_nested(); @@ -916,7 +916,7 @@ BENCHFUN if (!denoiseMethodRgb) { //lab mode //modification Jacques feb 2013 and july 2014 -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif @@ -964,7 +964,7 @@ BENCHFUN } } } else {//RGB mode -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif @@ -999,7 +999,7 @@ BENCHFUN } } } else {//image is not raw; use Lab parametrization -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif @@ -1159,7 +1159,7 @@ BENCHFUN if (!memoryAllocationFailed) { // precalculate madL, because it's used in adecomp and bdecomp int maxlvl = Ldecomp->maxlevel(); -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic) collapse(2) num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif @@ -1284,7 +1284,7 @@ BENCHFUN if (!memoryAllocationFailed) { // copy labdn->L to Lin before it gets modified by reconstruction Lin = new array2D(width, height); -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif @@ -1337,14 +1337,14 @@ BENCHFUN } } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP int masterThread = omp_get_thread_num(); #endif -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP int subThread = masterThread * denoiseNestedLevels + omp_get_thread_num(); #else int subThread = 0; @@ -1354,7 +1354,7 @@ BENCHFUN float *fLblox = fLbloxArray[subThread]; float pBuf[width + TS + 2 * blkrad * offset] ALIGNED16; float nbrwt[TS * TS] ALIGNED64; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -1454,7 +1454,7 @@ BENCHFUN } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif @@ -1591,7 +1591,7 @@ BENCHFUN realred /= 100.f; realblue /= 100.f; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) num_threads(denoiseNestedLevels) #endif @@ -1641,7 +1641,7 @@ BENCHFUN } } } else {//RGB mode -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) #endif @@ -1681,7 +1681,7 @@ BENCHFUN } } else { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) #endif @@ -1748,7 +1748,7 @@ BENCHFUN } } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP omp_set_nested(oldNested); #endif @@ -2043,7 +2043,7 @@ BENCHFUN //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -SSEFUNCTION void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT +void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT { int blkstart = hblproc * TS * TS; @@ -2254,7 +2254,7 @@ void ImProcFunctions::Noise_residualAB(wavelet_decomposition &WaveletCoeffs_ab, chmaxresid = maxresid; } -SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3]) +bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3]) { int maxlvl = min(WaveletCoeffs_L.maxlevel(), 5); const float eps = 0.01f; @@ -2272,7 +2272,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposit } bool memoryAllocationFailed = false; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif { @@ -2287,7 +2287,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposit if (!memoryAllocationFailed) { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -2387,7 +2387,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposit return (!memoryAllocationFailed); } -SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, +bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb) { int maxlvl = WaveletCoeffs_L.maxlevel(); @@ -2411,7 +2411,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi } bool memoryAllocationFailed = false; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif { @@ -2427,7 +2427,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi if (!memoryAllocationFailed) { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -2446,7 +2446,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi } } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -2550,7 +2550,7 @@ bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, } bool memoryAllocationFailed = false; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif { @@ -2565,7 +2565,7 @@ bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, } if (!memoryAllocationFailed) { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -2605,7 +2605,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L } bool memoryAllocationFailed = false; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif { @@ -2619,7 +2619,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L } if (!memoryAllocationFailed) { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -2643,7 +2643,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -SSEFUNCTION void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, +void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, float *noisevarlum, float * madL, float * vari, int edge) { @@ -2733,7 +2733,7 @@ SSEFUNCTION void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeff } -SSEFUNCTION void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir, +void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir, float *noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb, float * madL, float * madaab, bool madCalculated) @@ -2848,7 +2848,7 @@ SSEFUNCTION void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoef } -SSEFUNCTION void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b, +void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b, int W_ab, int H_ab, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice, int lvl, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb) @@ -2988,7 +2988,7 @@ void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition & } } -SSEFUNCTION void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope) +void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope) { gam = dnparams.gamma; gamthresh = 0.001f; @@ -3153,7 +3153,7 @@ void ImProcFunctions::calcautodn_info(float &chaut, float &delta, int Nb, int le } -SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread) +void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread) { if ((settings->leveldnautsimpl == 1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl == 0 && dnparams.C2method == "MANU")) { //nothing to do @@ -3192,7 +3192,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat bcalc[i] = new float[wid]; } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif @@ -3315,7 +3315,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat //fill tile from image; convert RGB to "luma/chroma" if (isRAW) {//image is raw; use channel differences for chroma channels -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif @@ -3368,7 +3368,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat #endif } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif @@ -3386,7 +3386,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat if (!denoiseMethodRgb) { //lab mode, modification Jacques feb 2013 and july 2014 -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif @@ -3523,17 +3523,17 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat } const int levwav = 5; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel sections if (multiThread) #endif { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp section #endif { adecomp = new wavelet_decomposition(labdn->data + datalen, labdn->W, labdn->H, levwav, 1); } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp section #endif { diff --git a/rtengine/LUT.h b/rtengine/LUT.h index 0fd906dc0..b2d11c234 100644 --- a/rtengine/LUT.h +++ b/rtengine/LUT.h @@ -104,7 +104,7 @@ protected: unsigned int upperBound; // always equals size-1, parameter created for performance reason private: unsigned int owner; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ vfloat maxsv ALIGNED16; vfloat sizev ALIGNED16; vint sizeiv ALIGNED16; @@ -136,7 +136,7 @@ public: maxs = size - 2; maxsf = (float)maxs; maxIndexFloat = ((float)upperBound) - 1e-5; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ maxsv = F2V( maxs ); sizeiv = _mm_set1_epi32( (int)(size - 1) ); sizev = F2V( size - 1 ); @@ -167,7 +167,7 @@ public: maxs = size - 2; maxsf = (float)maxs; maxIndexFloat = ((float)upperBound) - 1e-5; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ maxsv = F2V( maxs ); sizeiv = _mm_set1_epi32( (int)(size - 1) ); sizev = F2V( size - 1 ); @@ -178,7 +178,7 @@ public: { data = nullptr; reset(); -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ maxsv = ZEROV; sizev = ZEROV; sizeiv = _mm_setzero_si128(); @@ -243,7 +243,7 @@ public: this->maxs = this->size - 2; this->maxsf = (float)this->maxs; this->maxIndexFloat = ((float)this->upperBound) - 1e-5; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ this->maxsv = F2V( this->size - 2); this->sizeiv = _mm_set1_epi32( (int)(this->size - 1) ); this->sizev = F2V( this->size - 1 ); @@ -258,7 +258,7 @@ public: LUT & operator+=(LUT &rhs) { if (rhs.size == this->size) { -#ifdef _RT_NESTED_OPENMP // temporary solution to fix Issue #3324 +#ifdef _OPENMP #pragma omp simd #endif @@ -274,7 +274,7 @@ public: template::value>::type> LUT & operator*=(float factor) { -#ifdef _RT_NESTED_OPENMP // temporary solution to fix Issue #3324 +#ifdef _OPENMP #pragma omp simd #endif @@ -289,7 +289,7 @@ public: template::value>::type> LUT & operator/=(float divisor) { -#ifdef _RT_NESTED_OPENMP // temporary solution to fix Issue #3324 +#ifdef _OPENMP #pragma omp simd #endif @@ -307,7 +307,7 @@ public: return data[ rtengine::LIM(index, 0, upperBound) ]; } -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ // NOTE: This function requires LUTs which clips only at lower bound @@ -698,7 +698,7 @@ public: maxs = size - 2; maxsf = (float)maxs; maxIndexFloat = ((float)upperBound) - 1e-5; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ maxsv = F2V( size - 2); sizeiv = _mm_set1_epi32( (int)(size - 1) ); sizev = F2V( size - 1 ); diff --git a/rtengine/PF_correct_RT.cc b/rtengine/PF_correct_RT.cc index a31c78767..1a937b409 100644 --- a/rtengine/PF_correct_RT.cc +++ b/rtengine/PF_correct_RT.cc @@ -43,7 +43,7 @@ namespace rtengine { extern const Settings* settings; -SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radius, int thresh) +void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radius, int thresh) { const int halfwin = ceil(2 * radius) + 1; @@ -285,7 +285,7 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, free(fringe); } -SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double radius, int thresh) +void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double radius, int thresh) { const int halfwin = ceil(2 * radius) + 1; @@ -655,7 +655,7 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds free(fringe); } -SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad) +void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad) { const int halfwin = ceil(2 * radius) + 1; MyTime t1, t2; @@ -1263,7 +1263,7 @@ SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, d } -SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, double radius, int thresh, int mode, float skinprot, float chrom) +void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, double radius, int thresh, int mode, float skinprot, float chrom) { const int halfwin = ceil(2 * radius) + 1; MyTime t1, t2; diff --git a/rtengine/amaze_demosaic_RT.cc b/rtengine/amaze_demosaic_RT.cc index 30c673813..bdb798a06 100644 --- a/rtengine/amaze_demosaic_RT.cc +++ b/rtengine/amaze_demosaic_RT.cc @@ -38,7 +38,7 @@ namespace rtengine { -SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, array2D &rawData, array2D &red, array2D &green, array2D &blue) +void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, array2D &rawData, array2D &red, array2D &green, array2D &blue) { BENCHFUN diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h index 5475e8ffc..805575b77 100644 --- a/rtengine/boxblur.h +++ b/rtengine/boxblur.h @@ -121,7 +121,7 @@ template void boxblur (T** src, A** dst, int radx, int rady, i } -template SSEFUNCTION void boxblur (T** src, A** dst, T* buffer, int radx, int rady, int W, int H) +template void boxblur (T** src, A** dst, T* buffer, int radx, int rady, int W, int H) { //box blur image; box range = (radx,rady) @@ -309,7 +309,7 @@ template SSEFUNCTION void boxblur (T** src, A** dst, T* buffer } -template SSEFUNCTION void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H) +template void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H) { //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) @@ -496,7 +496,7 @@ template SSEFUNCTION void boxblur (T* src, A* dst, A* buffer, } -template SSEFUNCTION void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H, float * temp) +template void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H, float * temp) { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/rtengine/color.cc b/rtengine/color.cc index ee63720aa..68fd467e2 100644 --- a/rtengine/color.cc +++ b/rtengine/color.cc @@ -1725,7 +1725,7 @@ void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat & void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b, const float wp[3][3], int width) { -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ vfloat maxvalfv = F2V(MAXVALF); vfloat c116v = F2V(116.f); vfloat c5242d88v = F2V(5242.88f); @@ -1733,7 +1733,7 @@ void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b, vfloat c200v = F2V(200.f); #endif int i = 0; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ for(;i < width - 3; i+=4) { const vfloat rv = LVFU(R[i]); const vfloat gv = LVFU(G[i]); @@ -2606,7 +2606,7 @@ void Color::gamutLchonly (float2 sincosval, float &Lprov1, float &Chprov1, const * const double wip[3][3]: matrix for working profile * bool multiThread : parallelize the loop */ -SSEFUNCTION void Color::LabGamutMunsell(float *labL, float *laba, float *labb, const int N, bool corMunsell, bool lumaMuns, bool isHLEnabled, bool gamut, const double wip[3][3]) +void Color::LabGamutMunsell(float *labL, float *laba, float *labb, const int N, bool corMunsell, bool lumaMuns, bool isHLEnabled, bool gamut, const double wip[3][3]) { #ifdef _DEBUG MyTime t1e, t2e; diff --git a/rtengine/cplx_wavelet_level.h b/rtengine/cplx_wavelet_level.h index 59c206f27..9996ec2af 100644 --- a/rtengine/cplx_wavelet_level.h +++ b/rtengine/cplx_wavelet_level.h @@ -244,7 +244,7 @@ template void wavelet_level::SynthesisFilterHaarHorizontal (const * Applies a Haar filter * */ -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(numThreads) if(numThreads>1) #endif @@ -266,11 +266,11 @@ template void wavelet_level::SynthesisFilterHaarVertical (const T * Applies a Haar filter * */ -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(numThreads) if(numThreads>1) #endif { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for nowait #endif @@ -281,7 +281,7 @@ template void wavelet_level::SynthesisFilterHaarVertical (const T } } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -328,7 +328,7 @@ void wavelet_level::AnalysisFilterSubsampHorizontal (T * RESTRICT srcbuffer, } #ifdef __SSE2__ -template SSEFUNCTION void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], +template void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int height, const int row) { @@ -455,7 +455,7 @@ template void wavelet_level::SynthesisFilterSubsampHorizontal (T // calculate coefficients int shift = skip * (taps - offset - 1); //align filter with data -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(numThreads) if(numThreads>1) #endif @@ -506,7 +506,7 @@ template void wavelet_level::SynthesisFilterSubsampHorizontal (T } #ifdef __SSE2__ -template SSEFUNCTION void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend) +template void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend) { /* Basic convolution code @@ -521,7 +521,7 @@ template SSEFUNCTION void wavelet_level::SynthesisFilterSubsampVe __m128 fourv = _mm_set1_ps(4.f); __m128 srcFactorv = _mm_set1_ps(srcFactor); __m128 dstFactorv = _mm_set1_ps(blend); -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(numThreads) if(numThreads>1) #endif @@ -594,7 +594,7 @@ template void wavelet_level::SynthesisFilterSubsampVertical (T * // calculate coefficients int shift = skip * (taps - offset - 1); //align filter with data -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(numThreads) if(numThreads>1) #endif @@ -630,7 +630,7 @@ template void wavelet_level::SynthesisFilterSubsampVertical (T * #endif #ifdef __SSE2__ -template template SSEFUNCTION void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) +template template void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { /* filter along rows and columns */ @@ -644,7 +644,7 @@ template template SSEFUNCTION void wavelet_level::dec } } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(numThreads) if(numThreads>1) #endif { @@ -652,7 +652,7 @@ template template SSEFUNCTION void wavelet_level::dec T tmpHi[m_w] ALIGNED64; if(subsamp_out) { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -662,7 +662,7 @@ template template SSEFUNCTION void wavelet_level::dec AnalysisFilterSubsampHorizontal (tmpHi, wavcoeffs[2], wavcoeffs[3], filterH, filterH + taps, taps, offset, m_w, m_w2, row / 2); } } else { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -678,7 +678,7 @@ template template SSEFUNCTION void wavelet_level::dec template template void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(numThreads) if(numThreads>1) #endif { @@ -687,7 +687,7 @@ template template void wavelet_level::decompose_level /* filter along rows and columns */ if(subsamp_out) { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -697,7 +697,7 @@ template template void wavelet_level::decompose_level AnalysisFilterSubsampHorizontal (tmpHi, wavcoeffs[2], wavcoeffs[3], filterH, filterH + taps, taps, offset, m_w, m_w2, row / 2); } } else { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -714,7 +714,7 @@ template template void wavelet_level::decompose_level #ifdef __SSE2__ -template template SSEFUNCTION void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend) +template template void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend) { if(memoryAllocationFailed) { return; diff --git a/rtengine/curves.cc b/rtengine/curves.cc index 8d52782a2..508bd1640 100644 --- a/rtengine/curves.cc +++ b/rtengine/curves.cc @@ -476,7 +476,7 @@ void CurveFactory::complexsgnCurve (bool & autili, bool & butili, bool & ccutil } -SSEFUNCTION void CurveFactory::complexCurve (double ecomp, double black, double hlcompr, double hlcomprthresh, +void CurveFactory::complexCurve (double ecomp, double black, double hlcompr, double hlcomprthresh, double shcompr, double br, double contr, const std::vector& curvePoints, const std::vector& curvePoints2, diff --git a/rtengine/curves.h b/rtengine/curves.h index 6a37b7e46..d8e443fc9 100644 --- a/rtengine/curves.h +++ b/rtengine/curves.h @@ -828,7 +828,7 @@ class WeightedStdToneCurve : public ToneCurve { private: float Triangle(float refX, float refY, float X2) const; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ vfloat Triangle(vfloat refX, vfloat refY, vfloat X2) const; #endif public: @@ -904,7 +904,7 @@ inline void StandardToneCurve::BatchApply( // If we get to the end before getting to an aligned address, just return. // (Or, for non-SSE mode, if we get to the end.) return; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ } else if (reinterpret_cast(&r[i]) % 16 == 0) { // Otherwise, we get to the first aligned address; go to the SSE part. break; @@ -916,7 +916,7 @@ inline void StandardToneCurve::BatchApply( i++; } -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ for (; i + 3 < end; i += 4) { __m128 r_val = LVF(r[i]); __m128 g_val = LVF(g[i]); @@ -1007,7 +1007,7 @@ inline float WeightedStdToneCurve::Triangle(float a, float a1, float b) const return a1; } -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ inline vfloat WeightedStdToneCurve::Triangle(vfloat a, vfloat a1, vfloat b) const { vfloat a2 = a1 - a; @@ -1061,7 +1061,7 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en // If we get to the end before getting to an aligned address, just return. // (Or, for non-SSE mode, if we get to the end.) return; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ } else if (reinterpret_cast(&r[i]) % 16 == 0) { // Otherwise, we get to the first aligned address; go to the SSE part. break; @@ -1071,7 +1071,7 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en i++; } -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ const vfloat c65535v = F2V(65535.f); const vfloat zd5v = F2V(0.5f); const vfloat zd25v = F2V(0.25f); diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index 68b2ad8bb..90a202a1c 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -1314,7 +1314,7 @@ void RawImageSource::jdl_interpolate_omp() // from "Lassus" // Adapted to RawTherapee by Jacques Desmis 3/2013 // Improved speed and reduced memory consumption by Ingo Weyrich 2/2015 //TODO Tiles to reduce memory consumption -SSEFUNCTION void RawImageSource::lmmse_interpolate_omp(int winw, int winh, array2D &rawData, array2D &red, array2D &green, array2D &blue, int iterations) +void RawImageSource::lmmse_interpolate_omp(int winw, int winh, array2D &rawData, array2D &red, array2D &green, array2D &blue, int iterations) { const int width = winw, height = winh; const int ba = 10; @@ -1946,7 +1946,7 @@ SSEFUNCTION void RawImageSource::lmmse_interpolate_omp(int winw, int winh, array // SSE version by Ingo Weyrich 5/2013 #ifdef __SSE2__ #define CLIPV(a) LIMV(a,zerov,c65535v) -SSEFUNCTION void RawImageSource::igv_interpolate(int winw, int winh) +void RawImageSource::igv_interpolate(int winw, int winh) { static const float eps = 1e-5f, epssq = 1e-5f; //mod epssq -10f =>-5f Jacques 3/2013 to prevent artifact (divide by zero) @@ -2873,7 +2873,7 @@ void RawImageSource::nodemosaic(bool bw) #ifdef __SSE2__ #define CLIPV(a) LIMV(a,ZEROV,c65535v) #endif -SSEFUNCTION void RawImageSource::refinement(int PassCount) +void RawImageSource::refinement(int PassCount) { MyTime t1e, t2e; t1e.set(); @@ -3870,7 +3870,7 @@ void RawImageSource::cielab (const float (*rgb)[3], float* l, float* a, float *b return; } -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ vfloat zd5v = F2V(0.5f); vfloat c116v = F2V(116.f); vfloat c16v = F2V(16.f); @@ -3887,7 +3887,7 @@ void RawImageSource::cielab (const float (*rgb)[3], float* l, float* a, float *b for(int i = 0; i < height; i++) { int j = 0; -#if defined( __SSE2__ ) && defined( __x86_64__ ) // vectorized LUT access is restricted to __x86_64__ => we have to use the same restriction +#ifdef __SSE2__ for(; j < labWidth - 3; j += 4) { vfloat redv, greenv, bluev; diff --git a/rtengine/dirpyr_equalizer.cc b/rtengine/dirpyr_equalizer.cc index e20bc04e7..7d3ace3ff 100644 --- a/rtengine/dirpyr_equalizer.cc +++ b/rtengine/dirpyr_equalizer.cc @@ -40,7 +40,7 @@ extern const Settings* settings; //sequence of scales -SSEFUNCTION void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidth, int srcheight, float ** l_a, float ** l_b, const double * mult, const double dirpyrThreshold, const double skinprot, float b_l, float t_l, float t_r, int scaleprev) +void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidth, int srcheight, float ** l_a, float ** l_b, const double * mult, const double dirpyrThreshold, const double skinprot, float b_l, float t_l, float t_r, int scaleprev) { int lastlevel = maxlevel; @@ -380,7 +380,7 @@ void ImProcFunctions :: dirpyr_equalizercam (CieImage *ncie, float ** src, float } } -SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, int width, int height, int level, int scale) +void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, int width, int height, int level, int scale) { // scale is spacing of directional averaging weights // calculate weights, compute directionally weighted average diff --git a/rtengine/fast_demo.cc b/rtengine/fast_demo.cc index 8d02bafb2..900ff2119 100644 --- a/rtengine/fast_demo.cc +++ b/rtengine/fast_demo.cc @@ -52,7 +52,7 @@ LUTf RawImageSource::initInvGrad() #endif //LUTf RawImageSource::invGrad = RawImageSource::initInvGrad(); -SSEFUNCTION void RawImageSource::fast_demosaic() +void RawImageSource::fast_demosaic() { double progress = 0.0; diff --git a/rtengine/gauss.cc b/rtengine/gauss.cc index 09c91db1e..78275da9f 100644 --- a/rtengine/gauss.cc +++ b/rtengine/gauss.cc @@ -229,7 +229,7 @@ template void gaussHorizontal3 (T** src, T** dst, int W, int H, const f } #ifdef __SSE2__ -template SSEFUNCTION void gaussVertical3 (T** src, T** dst, int W, int H, const float c0, const float c1) +template void gaussVertical3 (T** src, T** dst, int W, int H, const float c0, const float c1) { vfloat Tv = F2V(0.f), Tm1v, Tp1v; vfloat Tv1 = F2V(0.f), Tm1v1, Tp1v1; @@ -314,7 +314,7 @@ template void gaussVertical3 (T** src, T** dst, int W, int H, const flo #ifdef __SSE2__ // fast gaussian approximation if the support window is large -template SSEFUNCTION void gaussHorizontalSse (T** src, T** dst, const int W, const int H, const float sigma) +template void gaussHorizontalSse (T** src, T** dst, const int W, const int H, const float sigma) { double b1, b2, b3, B, M[3][3]; calculateYvVFactors(sigma, b1, b2, b3, B, M); @@ -474,7 +474,7 @@ template void gaussHorizontal (T** src, T** dst, const int W, const int } #ifdef __SSE2__ -template SSEFUNCTION void gaussVerticalSse (T** src, T** dst, const int W, const int H, const float sigma) +template void gaussVerticalSse (T** src, T** dst, const int W, const int H, const float sigma) { double b1, b2, b3, B, M[3][3]; calculateYvVFactors(sigma, b1, b2, b3, B, M); @@ -617,7 +617,7 @@ template SSEFUNCTION void gaussVerticalSse (T** src, T** dst, const int #endif #ifdef __SSE2__ -template SSEFUNCTION void gaussVerticalSsemult (T** RESTRICT src, T** RESTRICT dst, const int W, const int H, const float sigma) +template void gaussVerticalSsemult (T** RESTRICT src, T** RESTRICT dst, const int W, const int H, const float sigma) { double b1, b2, b3, B, M[3][3]; calculateYvVFactors(sigma, b1, b2, b3, B, M); @@ -758,7 +758,7 @@ template SSEFUNCTION void gaussVerticalSsemult (T** RESTRICT src, T** R } } -template SSEFUNCTION void gaussVerticalSsediv (T** RESTRICT src, T** RESTRICT dst, T** divBuffer, const int W, const int H, const float sigma) +template void gaussVerticalSsediv (T** RESTRICT src, T** RESTRICT dst, T** divBuffer, const int W, const int H, const float sigma) { double b1, b2, b3, B, M[3][3]; calculateYvVFactors(sigma, b1, b2, b3, B, M); diff --git a/rtengine/helpersse2.h b/rtengine/helpersse2.h index 7e2185a7f..46af3aa89 100644 --- a/rtengine/helpersse2.h +++ b/rtengine/helpersse2.h @@ -29,29 +29,13 @@ typedef __m128 vfloat; typedef __m128i vint2; // -#ifdef __GNUC__ -#if (!defined(WIN32) || defined( __x86_64__ )) #define LVF(x) _mm_load_ps((float*)&x) #define LVFU(x) _mm_loadu_ps(&x) #define STVF(x,y) _mm_store_ps(&x,y) #define STVFU(x,y) _mm_storeu_ps(&x,y) #define LVI(x) _mm_load_si128((__m128i*)&x) -#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds -#define LVF(x) _mm_loadu_ps((float*)&x) -#define LVFU(x) _mm_loadu_ps(&x) -#define STVF(x,y) _mm_storeu_ps(&x,y) -#define STVFU(x,y) _mm_storeu_ps(&x,y) -#define LVI(x) _mm_loadu_si128((__m128i*)&x) -#endif -#else -#define LVF(x) _mm_load_ps((float*)&x) -#define LVFU(x) _mm_loadu_ps(&x) -#define STVF(x,y) _mm_store_ps(&x,y) -#define STVFU(x,y) _mm_storeu_ps(&x,y) -#define LVI(x) _mm_load_si128((__m128i*)&x) -#endif -#if defined(__x86_64__) && defined(__AVX__) +#ifdef __AVX__ #define PERMUTEPS(a,mask) _mm_permute_ps(a,mask) #else #define PERMUTEPS(a,mask) _mm_shuffle_ps(a,a,mask) @@ -67,7 +51,7 @@ static INLINE vfloat LC2VFU(float &a) // Store a vector of 4 floats in a[0],a[2],a[4] and a[6] -#if defined(__x86_64__) && defined(__SSE4_1__) +#ifdef __SSE4_1__ // SSE4.1 => use _mm_blend_ps instead of _mm_set_epi32 and vself #define STC2VFU(a,v) {\ __m128 TST1V = _mm_loadu_ps(&a);\ diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index f9e630f9d..b8ad23e70 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -35,7 +35,7 @@ extern const Settings* settings; //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -SSEFUNCTION void RawImageSource::boxblur2(float** src, float** dst, float** temp, int H, int W, int box ) +void RawImageSource::boxblur2(float** src, float** dst, float** temp, int H, int W, int box ) { //box blur image channel; box size = 2*box+1 //horizontal blur diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index d0c78f906..a76d4cd84 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -53,7 +53,7 @@ using namespace rtengine; // begin of helper function for rgbProc() void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) { -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ vfloat cr = F2V(0.299f); vfloat cg = F2V(0.587f); vfloat cb = F2V(0.114f); @@ -61,7 +61,7 @@ void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float for (int i = istart, ti = 0; i < tH; i++, ti++) { int j = jstart, tj = 0; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ for (; j < tW - 3; j+=4, tj+=4) { vfloat rv = LVF(rtemp[ti * tileSize + tj]); @@ -94,14 +94,14 @@ void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, float exp_scale, float comp, float hlrange) { -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ vfloat threev = F2V(3.f); vfloat maxvalfv = F2V(MAXVALF); #endif for (int i = istart, ti = 0; i < tH; i++, ti++) { int j = jstart, tj = 0; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ for (; j < tW - 3; j+=4, tj+=4) { vfloat rv = LVF(rtemp[ti * tileSize + tj]); @@ -158,7 +158,7 @@ void proPhotoBlue(float *rtemp, float *gtemp, float *btemp, int istart, int tH, // this is a hack to avoid the blue=>black bug (Issue 2141) for (int i = istart, ti = 0; i < tH; i++, ti++) { int j = jstart, tj = 0; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ for (; j < tW - 3; j+=4, tj+=4) { vfloat rv = LVF(rtemp[ti * tileSize + tj]); vfloat gv = LVF(gtemp[ti * tileSize + tj]); @@ -3763,7 +3763,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer } else { for (int i = istart, ti = 0; i < tH; i++, ti++) { int j = jstart, tj = 0; -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ for (; j < tW - 3; j+=4, tj+=4) { //brightness/contrast STVF(rtemp[ti * TS + tj], tonecurve(LVF(rtemp[ti * TS + tj]))); @@ -5391,7 +5391,7 @@ void ImProcFunctions::luminanceCurve (LabImage* lold, LabImage* lnew, LUTf & cur -SSEFUNCTION void ImProcFunctions::chromiLuminanceCurve (PipetteBuffer *pipetteBuffer, int pW, LabImage* lold, LabImage* lnew, LUTf & acurve, LUTf & bcurve, LUTf & satcurve, LUTf & lhskcurve, LUTf & clcurve, LUTf & curve, bool utili, bool autili, bool butili, bool ccutili, bool cclutili, bool clcutili, LUTu &histCCurve, LUTu &histLCurve) +void ImProcFunctions::chromiLuminanceCurve (PipetteBuffer *pipetteBuffer, int pW, LabImage* lold, LabImage* lnew, LUTf & acurve, LUTf & bcurve, LUTf & satcurve, LUTf & lhskcurve, LUTf & clcurve, LUTf & curve, bool utili, bool autili, bool butili, bool ccutili, bool cclutili, bool clcutili, LUTu &histCCurve, LUTu &histLCurve) { if (!params->labCurve.enabled) { return; @@ -6939,7 +6939,7 @@ void ImProcFunctions::rgb2lab (const Imagefloat &src, LabImage &dst, const Glib: } } -SSEFUNCTION void ImProcFunctions::lab2rgb (const LabImage &src, Imagefloat &dst, const Glib::ustring &workingSpace) +void ImProcFunctions::lab2rgb (const LabImage &src, Imagefloat &dst, const Glib::ustring &workingSpace) { TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix ( workingSpace ); const float wip[3][3] = { diff --git a/rtengine/impulse_denoise.cc b/rtengine/impulse_denoise.cc index a40577f39..907afc1a1 100644 --- a/rtengine/impulse_denoise.cc +++ b/rtengine/impulse_denoise.cc @@ -31,7 +31,7 @@ using namespace std; namespace rtengine { -SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh) +void ImProcFunctions::impulse_nr (LabImage* lab, double thresh) { // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // impulse noise removal @@ -260,7 +260,7 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh) } -SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh, float **buffers[3]) +void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh, float **buffers[3]) { // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // impulse noise removal diff --git a/rtengine/ipresize.cc b/rtengine/ipresize.cc index ba559b2a0..03502e99b 100644 --- a/rtengine/ipresize.cc +++ b/rtengine/ipresize.cc @@ -178,7 +178,7 @@ void ImProcFunctions::Lanczos (const Imagefloat* src, Imagefloat* dst, float sca } -SSEFUNCTION void ImProcFunctions::Lanczos (const LabImage* src, LabImage* dst, float scale) +void ImProcFunctions::Lanczos (const LabImage* src, LabImage* dst, float scale) { const float delta = 1.0f / scale; const float a = 3.0f; diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc index 388567f8a..b90b7f295 100644 --- a/rtengine/ipsharpen.cc +++ b/rtengine/ipsharpen.cc @@ -34,7 +34,7 @@ namespace rtengine #define ABS(a) ((a)<0?-(a):(a)) extern const Settings* settings; -SSEFUNCTION void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) +void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H) { const float dampingFac = -2.0 / (damping * damping); diff --git a/rtengine/ipwavelet.cc b/rtengine/ipwavelet.cc index 04db1c314..f4842940e 100644 --- a/rtengine/ipwavelet.cc +++ b/rtengine/ipwavelet.cc @@ -140,7 +140,7 @@ struct cont_params { int wavNestedLevels = 1; -SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const procparams::WaveletParams & waparams, const WavCurve & wavCLVCcurve, const WavOpacityCurveRG & waOpacityCurveRG, const WavOpacityCurveBY & waOpacityCurveBY, const WavOpacityCurveW & waOpacityCurveW, const WavOpacityCurveWL & waOpacityCurveWL, LUTf &wavclCurve, int skip) +void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const procparams::WaveletParams & waparams, const WavCurve & wavCLVCcurve, const WavOpacityCurveRG & waOpacityCurveRG, const WavOpacityCurveBY & waOpacityCurveBY, const WavOpacityCurveW & waOpacityCurveW, const WavOpacityCurveWL & waOpacityCurveWL, LUTf &wavclCurve, int skip) { @@ -645,7 +645,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int numthreads = MIN(numthreads, maxnumberofthreadsforwavelet); } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP wavNestedLevels = omp_get_max_threads() / numthreads; bool oldNested = omp_get_nested(); @@ -720,7 +720,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int Lold = lab->L; } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif @@ -784,7 +784,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int } } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif @@ -868,7 +868,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int if(!Ldecomp->memoryAllocationFailed) { float madL[8][3]; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic) collapse(2) num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif @@ -1076,7 +1076,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int bool highlight = params->toneCurve.hrenabled; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif @@ -1246,7 +1246,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int delete [] varchro; } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP omp_set_nested(oldNested); #endif @@ -1276,12 +1276,12 @@ void ImProcFunctions::Aver( float * RESTRICT DataList, int datalen, float &aver float thres = 5.f;//different fom zero to take into account only data large enough max = 0.f; min = 0.f; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif { float lmax = 0.f, lmin = 0.f; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for reduction(+:averaP,averaN,countP,countN) nowait #endif @@ -1305,7 +1305,7 @@ void ImProcFunctions::Aver( float * RESTRICT DataList, int datalen, float &aver } } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp critical #endif { @@ -1335,7 +1335,7 @@ void ImProcFunctions::Sigma( float * RESTRICT DataList, int datalen, float aver float variP = 0.f, variN = 0.f; float thres = 5.f;//different fom zero to take into account only data large enough -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for reduction(+:variP,variN,countP,countN) num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif @@ -1434,7 +1434,7 @@ float *ImProcFunctions::ContrastDR(float *Source, int W_L, int H_L, float *Contr } memcpy(Contrast, Source, n * sizeof(float)); -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for #endif @@ -1445,19 +1445,19 @@ float *ImProcFunctions::ContrastDR(float *Source, int W_L, int H_L, float *Contr return Contrast; } -SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost, float *Compressed) +float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost, float *Compressed) { const float eps = 0.000001f; int n = W_L * H_L; #ifdef __SSE2__ -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel #endif { __m128 epsv = _mm_set1_ps( eps ); -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -1471,7 +1471,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, } #else -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for #endif @@ -1508,7 +1508,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, } #ifdef __SSE2__ -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel #endif { @@ -1516,7 +1516,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, __m128 epsv = _mm_set1_ps( eps ); __m128 DetailBoostv = _mm_set1_ps( DetailBoost ); __m128 tempv = _mm_set1_ps( temp ); -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -1537,7 +1537,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, } #else -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for #endif @@ -1572,7 +1572,7 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, struct cont_params &cp min0 = 0.0f; } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for #endif @@ -1592,7 +1592,7 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, struct cont_params &cp CompressDR(WavCoeffs_L0, W_L, H_L, Compression, DetailBoost, WavCoeffs_L0); -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for // removed schedule(dynamic,10) #endif @@ -1624,7 +1624,7 @@ void ImProcFunctions::EPDToneMapResid(float * WavCoeffs_L0, unsigned int Iterat } // max0=32768.f; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for #endif @@ -1649,7 +1649,7 @@ void ImProcFunctions::EPDToneMapResid(float * WavCoeffs_L0, unsigned int Iterat epd2.CompressDynamicRange(WavCoeffs_L0, (float)sca / skip, edgest, Compression, DetailBoost, Iterates, rew); //Restore past range, also desaturate a bit per Mantiuk's Color correction for tone mapping. -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for // removed schedule(dynamic,10) #endif @@ -1692,7 +1692,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * float min0 = FLT_MAX; if(contrast != 0.f || (cp.tonemap && cp.resena)) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for reduction(+:avedbl) num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif @@ -1700,14 +1700,14 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * avedbl += WavCoeffs_L0[i]; } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif { float lminL = FLT_MAX; float lmaxL = 0.f; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -1722,7 +1722,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp critical #endif { @@ -1782,13 +1782,13 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * koeLi[j][i] = 0.f; } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif { if(contrast != 0.f && cp.resena) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for #endif @@ -1817,18 +1817,18 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * if(cp.tonemap && cp.contmet == 1 && cp.resena) { float maxp = max0 * 256.f; float minp = min0 * 256.f; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp single #endif ContrastResid(WavCoeffs_L0, cp, W_L, H_L, maxp, minp); } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp barrier #endif if((cp.conres != 0.f || cp.conresH != 0.f) && cp.resena) { // cp.conres = 0.f and cp.comresH = 0.f means that all will be multiplied by 1.f, so we can skip this step -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for nowait #endif @@ -1890,7 +1890,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * tmC[i] = &tmCBuffer[i * W_L]; } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -1910,7 +1910,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * float aamp = 1.f + cp.eddetthrHi / 100.f; for (int lvl = 0; lvl < 4; lvl++) { -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic,16) #endif @@ -2013,7 +2013,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * // end } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -2048,7 +2048,7 @@ void ImProcFunctions::WaveletAandBAllAB(wavelet_decomposition &WaveletCoeffs_a, float * WavCoeffs_a0 = WaveletCoeffs_a.coeff0; float * WavCoeffs_b0 = WaveletCoeffs_b.coeff0; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif { @@ -2056,7 +2056,7 @@ void ImProcFunctions::WaveletAandBAllAB(wavelet_decomposition &WaveletCoeffs_a, float huebuffer[W_L] ALIGNED64; float chrbuffer[W_L] ALIGNED64; #endif // __SSE2__ -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic,16) #endif @@ -2116,13 +2116,13 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float float * WavCoeffs_ab0 = WaveletCoeffs_ab.coeff0; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif { if(cp.chrores != 0.f && cp.resena) { // cp.chrores == 0.f means all will be multiplied by 1.f, so we can skip the processing of residual -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for nowait #endif @@ -2175,7 +2175,7 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float if(cp.cbena && cp.resena) {//if user select Toning and color balance -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for nowait #endif @@ -2230,7 +2230,7 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float } } -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -2425,7 +2425,7 @@ void ImProcFunctions::finalContAllL (float ** WavCoeffs_L, float * WavCoeffs_L0, float bsig = 0.5f - asig * mean[level]; float amean = 0.5f / mean[level]; -#ifdef _RT_NESTED_OPENMP +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic, W_L * 16) num_threads(wavNestedLevels) if(wavNestedLevels>1) #endif diff --git a/rtengine/lcp.cc b/rtengine/lcp.cc index baaa88767..a1484cdf6 100644 --- a/rtengine/lcp.cc +++ b/rtengine/lcp.cc @@ -1131,7 +1131,7 @@ void rtengine::LCPMapper::correctCA(double& x, double& y, int cx, int cy, int ch y -= cy; } -SSEFUNCTION void rtengine::LCPMapper::processVignetteLine(int width, int y, float* line) const +void rtengine::LCPMapper::processVignetteLine(int width, int y, float* line) const { // No need for swapXY, since vignette is in RAW and always before rotation float yd = ((float)y - mc.y0) * mc.rfy; @@ -1169,7 +1169,7 @@ SSEFUNCTION void rtengine::LCPMapper::processVignetteLine(int width, int y, floa } } -SSEFUNCTION void rtengine::LCPMapper::processVignetteLine3Channels(int width, int y, float* line) const +void rtengine::LCPMapper::processVignetteLine3Channels(int width, int y, float* line) const { // No need for swapXY, since vignette is in RAW and always before rotation float yd = ((float)y - mc.y0) * mc.rfy; diff --git a/rtengine/opthelper.h b/rtengine/opthelper.h index d6af9a745..ce1f620e1 100644 --- a/rtengine/opthelper.h +++ b/rtengine/opthelper.h @@ -26,44 +26,14 @@ #ifdef __SSE2__ #include "sleefsseavx.c" - #ifdef __GNUC__ - #if defined(WIN32) && !defined( __x86_64__ ) - // needed for actual versions of GCC with 32-Bit Windows - #define SSEFUNCTION __attribute__((force_align_arg_pointer)) - #else - #define SSEFUNCTION - #endif - #else - #define SSEFUNCTION - #endif - #else - #ifdef __SSE__ - #ifdef __GNUC__ - #if defined(WIN32) && !defined( __x86_64__ ) - // needed for actual versions of GCC with 32-Bit Windows - #define SSEFUNCTION __attribute__((force_align_arg_pointer)) - #else - #define SSEFUNCTION - #endif - #else - #define SSEFUNCTION - #endif - #else - #define SSEFUNCTION - #endif #endif #ifdef __GNUC__ #define RESTRICT __restrict__ #define LIKELY(x) __builtin_expect (!!(x), 1) #define UNLIKELY(x) __builtin_expect (!!(x), 0) - #if (!defined(WIN32) || defined( __x86_64__ )) - #define ALIGNED64 __attribute__ ((aligned (64))) - #define ALIGNED16 __attribute__ ((aligned (16))) - #else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also needed for WIN32 builds - #define ALIGNED64 - #define ALIGNED16 - #endif + #define ALIGNED64 __attribute__ ((aligned (64))) + #define ALIGNED16 __attribute__ ((aligned (16))) #else #define RESTRICT #define LIKELY(x) (x) @@ -71,7 +41,4 @@ #define ALIGNED64 #define ALIGNED16 #endif - #if defined _OPENMP - #define _RT_NESTED_OPENMP - #endif #endif diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index ea7b0e559..04066d49f 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -1341,7 +1341,7 @@ int RawImageSource::interpolateBadPixelsXtrans( PixelsMap &bitmapBads ) * (Taken from Emil Martinec idea) * (Optimized by Ingo Weyrich 2013 and 2015) */ -SSEFUNCTION int RawImageSource::findHotDeadPixels( PixelsMap &bpMap, float thresh, bool findHotPixels, bool findDeadPixels ) +int RawImageSource::findHotDeadPixels( PixelsMap &bpMap, float thresh, bool findHotPixels, bool findDeadPixels ) { float varthresh = (20.0 * (thresh / 100.0) + 1.0 ) / 24.f; @@ -3169,7 +3169,7 @@ void RawImageSource::copyOriginalPixels(const RAWParams &raw, RawImage *src, Raw } } -SSEFUNCTION void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur, const int boxH, const int boxW) +void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur, const int boxH, const int boxW) { if(boxW == 0 && boxH == 0) { // nothing to blur diff --git a/rtengine/shmap.cc b/rtengine/shmap.cc index 26674c906..b34aac7af 100644 --- a/rtengine/shmap.cc +++ b/rtengine/shmap.cc @@ -358,7 +358,7 @@ void SHMap::forceStat (float max_, float min_, float avg_) avg = avg_; } -SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, int height, LUTf & rangefn, int level, int scale) +void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, int height, LUTf & rangefn, int level, int scale) { //scale is spacing of directional averaging weights @@ -375,7 +375,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i #pragma omp parallel #endif { -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ vfloat dirwtv, valv, normv, dftemp1v, dftemp2v; #endif // __SSE2__ int j; @@ -402,7 +402,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i data_coarse[i][j] = val / norm; // low pass filter } -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ int inbrMin = max(i - scalewin, i % scale); for(; j < (width - scalewin) - 3; j += 4) { @@ -482,7 +482,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i #pragma omp parallel #endif { -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ vfloat dirwtv, valv, normv, dftemp1v, dftemp2v; float domkerv[5][5][4] ALIGNED16 = {{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}}; @@ -510,7 +510,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i data_coarse[i][j] = val / norm; // low pass filter } -#if defined( __SSE2__ ) && defined( __x86_64__ ) +#ifdef __SSE2__ for(; j < width - scalewin - 3; j += 4) { valv = _mm_setzero_ps(); diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index dcea09e2b..706d5585d 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -906,7 +906,7 @@ typedef struct { static INLINE vfloat vabsf(vfloat f) { return (vfloat)vandnotm((vmask)vcast_vf_f(-0.0f), (vmask)f); } static INLINE vfloat vnegf(vfloat f) { return (vfloat)vxorm((vmask)f, (vmask)vcast_vf_f(-0.0f)); } -#if defined( __SSE4_1__ ) && defined( __x86_64__ ) +#ifdef __SSE4_1__ // only one instruction when using SSE4.1 static INLINE vfloat vself(vmask mask, vfloat x, vfloat y) { return _mm_blendv_ps(y,x,(vfloat)mask); diff --git a/rtgui/bqentryupdater.cc b/rtgui/bqentryupdater.cc index 1bb7e4257..f5adf56f9 100644 --- a/rtgui/bqentryupdater.cc +++ b/rtgui/bqentryupdater.cc @@ -73,14 +73,8 @@ void BatchQueueEntryUpdater::process (guint8* oimg, int ow, int oh, int newh, BQ stopped = false; tostop = false; -#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && defined( WIN32 ) && defined(__x86_64__) -#undef THREAD_PRIORITY_NORMAL - // See Issue 2384 comment #3 - thread = Glib::Thread::create(sigc::mem_fun(*this, &BatchQueueEntryUpdater::processThread), (unsigned long int)0, true, true, Glib::THREAD_PRIORITY_NORMAL); -#else #undef THREAD_PRIORITY_LOW thread = Glib::Thread::create(sigc::mem_fun(*this, &BatchQueueEntryUpdater::processThread), (unsigned long int)0, true, true, Glib::THREAD_PRIORITY_LOW); -#endif } } diff --git a/rtgui/histogrampanel.cc b/rtgui/histogrampanel.cc index 002c4608b..51e288951 100644 --- a/rtgui/histogrampanel.cc +++ b/rtgui/histogrampanel.cc @@ -944,7 +944,7 @@ void HistogramArea::update (LUTu &histRed, LUTu &histGreen, LUTu &histBlue, LUTu idle_register.add(func, haih); } -SSEFUNCTION void HistogramArea::updateBackBuffer () +void HistogramArea::updateBackBuffer () { if (!get_realized ()) {