From 6bebc19f02f23c5c6af98e44d807ae4c7f9dfee0 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 26 Sep 2019 15:03:09 +0200 Subject: [PATCH] reviewed boxblur code and usage --- rtengine/CMakeLists.txt | 1 + rtengine/FTblockDN.cc | 212 +++----- rtengine/boxblur.cc | 420 +++++++++++++++ rtengine/boxblur.h | 872 +----------------------------- rtengine/gauss.cc | 12 +- rtengine/gauss.h | 2 +- rtengine/guidedfilter.cc | 2 +- rtengine/improcfun.h | 20 +- rtengine/ipretinex.cc | 1044 +++++++++++++++++------------------- rtengine/ipsharpen.cc | 4 +- rtengine/ipwavelet.cc | 1 - rtengine/rawimagesource.cc | 4 +- rtengine/rawimagesource.h | 2 +- rtengine/rescale.h | 1 + rtengine/shmap.cc | 13 +- 15 files changed, 1015 insertions(+), 1595 deletions(-) create mode 100644 rtengine/boxblur.cc diff --git a/rtengine/CMakeLists.txt b/rtengine/CMakeLists.txt index f52cfa256..add2e51c0 100644 --- a/rtengine/CMakeLists.txt +++ b/rtengine/CMakeLists.txt @@ -31,6 +31,7 @@ set(CAMCONSTSFILE "camconst.json") set(RTENGINESOURCEFILES badpixels.cc + boxblur.cc CA_correct_RT.cc capturesharpening.cc EdgePreservingDecomposition.cc diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index d1e659114..8fd0ba29e 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -479,6 +479,7 @@ enum nrquality {QUALITY_STANDARD, QUALITY_HIGH}; void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi) { BENCHFUN + //#ifdef _DEBUG MyTime t1e, t2e; t1e.set(); @@ -687,8 +688,8 @@ BENCHFUN } } - int tilesize; - int overlap; + int tilesize = 0; + int overlap = 0; if (settings->leveldnti == 0) { tilesize = 1024; @@ -1341,8 +1342,6 @@ BENCHFUN #ifdef _OPENMP int masterThread = omp_get_thread_num(); -#endif -#ifdef _OPENMP #pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) #endif { @@ -1351,11 +1350,9 @@ BENCHFUN #else int subThread = 0; #endif - float blurbuffer[TS * TS] ALIGNED64; float *Lblox = LbloxArray[subThread]; float *fLblox = fLbloxArray[subThread]; float pBuf[width + TS + 2 * blkrad * offset] ALIGNED16; - float nbrwt[TS * TS] ALIGNED64; #ifdef _OPENMP #pragma omp for #endif @@ -1430,7 +1427,7 @@ BENCHFUN for (int hblk = 0; hblk < numblox_W; ++hblk) { - RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer); + RGBtile_denoise(fLblox, hblk, noisevar_Ldetail); }//end of horizontal block loop //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -1447,14 +1444,8 @@ BENCHFUN //add row of blocks to output image tile RGBoutput_tile_row(Lblox, Ldetail, tilemask_out, height, width, topproc); - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - }//end of vertical block loop - - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - } - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1) @@ -2041,26 +2032,20 @@ BENCHFUN }//end of main RGB_denoise - -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT +void ImProcFunctions::RGBtile_denoise(float* fLblox, int hblproc, float noisevar_Ldetail) //for DCT { - int blkstart = hblproc * TS * TS; + float nbrwt[TS * TS] ALIGNED64; + const int blkstart = hblproc * TS * TS; - boxabsblur(fLblox + blkstart, nbrwt, 3, 3, TS, TS, blurbuffer); //blur neighbor weights for more robust estimation //for DCT + boxabsblur(fLblox + blkstart, nbrwt, 3, TS, TS, false); //blur neighbor weights for more robust estimation //for DCT #ifdef __SSE2__ - __m128 tempv; - __m128 noisevar_Ldetailv = _mm_set1_ps(noisevar_Ldetail); - __m128 onev = _mm_set1_ps(1.0f); + const vfloat noisevar_Ldetailv = F2V(-1.f / noisevar_Ldetail); + const vfloat onev = F2V(1.f); for (int n = 0; n < TS * TS; n += 4) { //for DCT - tempv = onev - xexpf(-SQRV(LVF(nbrwt[n])) / noisevar_Ldetailv); - _mm_storeu_ps(&fLblox[blkstart + n], LVFU(fLblox[blkstart + n]) * tempv); + const vfloat tempv = onev - xexpf(SQRV(LVF(nbrwt[n])) * noisevar_Ldetailv); + STVF(fLblox[blkstart + n], LVF(fLblox[blkstart + n]) * tempv); }//output neighbor averaged result #else @@ -2071,14 +2056,7 @@ void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noiseva #endif - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - //printf("vblk=%d hlk=%d wsqave=%f || ",vblproc,hblproc,wsqave); - -}//end of function tile_denoise - - -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +} void ImProcFunctions::RGBoutput_tile_row(float *bloxrow_L, float ** Ldetail, float ** tilemask_out, int height, int width, int top) { @@ -2207,7 +2185,7 @@ void ImProcFunctions::Noise_residualAB(const wavelet_decomposition &WaveletCoeff chmaxresid = maxresid; } -bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3]) +bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3]) { int maxlvl = min(WaveletCoeffs_L.maxlevel(), 5); const float eps = 0.01f; @@ -2258,23 +2236,22 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet //simple wavelet shrinkage float * sfave = buffer[0] + 32; float * sfaved = buffer[2] + 96; - float * blurBuffer = buffer[1] + 64; float mad_Lr = madL[lvl][dir - 1]; float levelFactor = mad_Lr * 5.f / (lvl + 1); #ifdef __SSE2__ - __m128 mad_Lv; - __m128 ninev = _mm_set1_ps(9.0f); - __m128 epsv = _mm_set1_ps(eps); - __m128 mag_Lv; - __m128 levelFactorv = _mm_set1_ps(levelFactor); + vfloat mad_Lv; + vfloat ninev = F2V(9.0f); + vfloat epsv = F2V(eps); + vfloat mag_Lv; + vfloat levelFactorv = F2V(levelFactor); int coeffloc_L; for (coeffloc_L = 0; coeffloc_L < Hlvl_L * Wlvl_L - 3; coeffloc_L += 4) { mad_Lv = LVFU(noisevarlum[coeffloc_L]) * levelFactorv; mag_Lv = SQRV(LVFU(WavCoeffs_L[dir][coeffloc_L])); - _mm_storeu_ps(&sfave[coeffloc_L], mag_Lv / (mag_Lv + mad_Lv * xexpf(-mag_Lv / (mad_Lv * ninev)) + epsv)); + STVFU(sfave[coeffloc_L], mag_Lv / (mag_Lv + mad_Lv * xexpf(-mag_Lv / (mad_Lv * ninev)) + epsv)); } for (; coeffloc_L < Hlvl_L * Wlvl_L; ++coeffloc_L) { @@ -2294,15 +2271,15 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet } #endif - boxblur(sfave, sfaved, blurBuffer, lvl + 2, lvl + 2, Wlvl_L, Hlvl_L); //increase smoothness by locally averaging shrinkage + boxblur(sfave, sfaved, lvl + 2, Wlvl_L, Hlvl_L, false); //increase smoothness by locally averaging shrinkage #ifdef __SSE2__ - __m128 sfavev; - __m128 sf_Lv; + vfloat sfavev; + vfloat sf_Lv; for (coeffloc_L = 0; coeffloc_L < Hlvl_L * Wlvl_L - 3; coeffloc_L += 4) { sfavev = LVFU(sfaved[coeffloc_L]); sf_Lv = LVFU(sfave[coeffloc_L]); - _mm_storeu_ps(&WavCoeffs_L[dir][coeffloc_L], LVFU(WavCoeffs_L[dir][coeffloc_L]) * (SQRV(sfavev) + SQRV(sf_Lv)) / (sfavev + sf_Lv + epsv)); + STVFU(WavCoeffs_L[dir][coeffloc_L], LVFU(WavCoeffs_L[dir][coeffloc_L]) * (SQRV(sfavev) + SQRV(sf_Lv)) / (sfavev + sf_Lv + epsv)); //use smoothed shrinkage unless local shrinkage is much less } @@ -2340,7 +2317,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet return (!memoryAllocationFailed); } -bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, +bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb) { int maxlvl = WaveletCoeffs_L.maxlevel(); @@ -2422,12 +2399,12 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele if (noisevar_ab > 0.001f) { #ifdef __SSE2__ - __m128 onev = _mm_set1_ps(1.f); - __m128 mad_abrv = _mm_set1_ps(mad_abr); - __m128 rmad_Lm9v = onev / _mm_set1_ps(mad_Lr * 9.f); - __m128 mad_abv; - __m128 mag_Lv, mag_abv; - __m128 tempabv; + vfloat onev = F2V(1.f); + vfloat mad_abrv = F2V(mad_abr); + vfloat rmad_Lm9v = onev / F2V(mad_Lr * 9.f); + vfloat mad_abv; + vfloat mag_Lv, mag_abv; + vfloat tempabv; int coeffloc_ab; for (coeffloc_ab = 0; coeffloc_ab < Hlvl_ab * Wlvl_ab - 3; coeffloc_ab += 4) { @@ -2437,7 +2414,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele mag_Lv = LVFU(WavCoeffs_L[dir][coeffloc_ab]); mag_abv = SQRV(tempabv); mag_Lv = SQRV(mag_Lv) * rmad_Lm9v; - _mm_storeu_ps(&WavCoeffs_ab[dir][coeffloc_ab], tempabv * SQRV((onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv))))); + STVFU(WavCoeffs_ab[dir][coeffloc_ab], tempabv * SQRV((onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv))))); } // few remaining pixels @@ -2470,9 +2447,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele } for (int i = 2; i >= 0; i--) { - if (buffer[i] != nullptr) { - delete[] buffer[i]; - } + delete[] buffer[i]; } } @@ -2480,7 +2455,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele } -bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge)//mod JD +bool ImProcFunctions::WaveletDenoiseAllL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge)//mod JD { @@ -2530,16 +2505,14 @@ bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, } for (int i = 3; i >= 0; i--) { - if (buffer[i] != nullptr) { - delete[] buffer[i]; - } + delete[] buffer[i]; } } return (!memoryAllocationFailed); } -bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, +bool ImProcFunctions::WaveletDenoiseAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb)//mod JD { @@ -2596,7 +2569,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, +void ImProcFunctions::ShrinkAllL(const wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, float *noisevarlum, float * madL, float * vari, int edge) { @@ -2607,12 +2580,12 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float * float * sfaved = buffer[1] + 64; float * blurBuffer = buffer[2] + 96; - int W_L = WaveletCoeffs_L.level_W(level); - int H_L = WaveletCoeffs_L.level_H(level); + const int W_L = WaveletCoeffs_L.level_W(level); + const int H_L = WaveletCoeffs_L.level_H(level); float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(level); -// printf("OK lev=%d\n",level); - float mad_L = madL[dir - 1] ; + const float mad_L = madL[dir - 1] ; + const float levelFactor = mad_L * 5.f / static_cast(level + 1); if (edge == 1 && vari) { noisevarlum = blurBuffer; // we need one buffer, but fortunately we don't have to allocate a new one because we can use blurBuffer @@ -2622,71 +2595,45 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float * } } - float levelFactor = mad_L * 5.f / static_cast(level + 1); + int i = 0; #ifdef __SSE2__ - __m128 magv; - __m128 levelFactorv = _mm_set1_ps(levelFactor); - __m128 mad_Lv; - __m128 ninev = _mm_set1_ps(9.0f); - __m128 epsv = _mm_set1_ps(eps); - int i; + const vfloat levelFactorv = F2V(levelFactor); + const vfloat ninev = F2V(9.f); + const vfloat epsv = F2V(eps); - for (i = 0; i < W_L * H_L - 3; i += 4) { - mad_Lv = LVFU(noisevarlum[i]) * levelFactorv; - magv = SQRV(LVFU(WavCoeffs_L[dir][i])); - _mm_storeu_ps(&sfave[i], magv / (magv + mad_Lv * xexpf(-magv / (ninev * mad_Lv)) + epsv)); + for (; i < W_L * H_L - 3; i += 4) { + const vfloat mad_Lv = LVFU(noisevarlum[i]) * levelFactorv; + const vfloat magv = SQRV(LVFU(WavCoeffs_L[dir][i])); + STVFU(sfave[i], magv / (magv + mad_Lv * xexpf(-magv / (ninev * mad_Lv)) + epsv)); } - +#endif // few remaining pixels for (; i < W_L * H_L; ++i) { - float mag = SQR(WavCoeffs_L[dir][i]); + const float mag = SQR(WavCoeffs_L[dir][i]); sfave[i] = mag / (mag + levelFactor * noisevarlum[i] * xexpf(-mag / (9 * levelFactor * noisevarlum[i])) + eps); } -#else - - for (int i = 0; i < W_L * H_L; ++i) { - - float mag = SQR(WavCoeffs_L[dir][i]); - float shrinkfactor = mag / (mag + levelFactor * noisevarlum[i] * xexpf(-mag / (9 * levelFactor * noisevarlum[i])) + eps); - sfave[i] = shrinkfactor; - } - -#endif - boxblur(sfave, sfaved, blurBuffer, level + 2, level + 2, W_L, H_L); //increase smoothness by locally averaging shrinkage + boxblur(sfave, sfaved, level + 2, W_L, H_L, false); //increase smoothness by locally averaging shrinkage + i = 0; #ifdef __SSE2__ - __m128 sfv; - for (i = 0; i < W_L * H_L - 3; i += 4) { - sfv = LVFU(sfave[i]); + for (; i < W_L * H_L - 3; i += 4) { + const vfloat sfv = LVFU(sfave[i]); //use smoothed shrinkage unless local shrinkage is much less - _mm_storeu_ps(&WavCoeffs_L[dir][i], _mm_loadu_ps(&WavCoeffs_L[dir][i]) * (SQRV(LVFU(sfaved[i])) + SQRV(sfv)) / (LVFU(sfaved[i]) + sfv + epsv)); + STVFU(WavCoeffs_L[dir][i], LVFU(WavCoeffs_L[dir][i]) * (SQRV(LVFU(sfaved[i])) + SQRV(sfv)) / (LVFU(sfaved[i]) + sfv + epsv)); } - +#endif // few remaining pixels for (; i < W_L * H_L; ++i) { - float sf = sfave[i]; - + const float sf = sfave[i]; //use smoothed shrinkage unless local shrinkage is much less WavCoeffs_L[dir][i] *= (SQR(sfaved[i]) + SQR(sf)) / (sfaved[i] + sf + eps); }//now luminance coefficients are denoised - -#else - - for (int i = 0; i < W_L * H_L; ++i) { - float sf = sfave[i]; - - //use smoothed shrinkage unless local shrinkage is much less - WavCoeffs_L[dir][i] *= (SQR(sfaved[i]) + SQR(sf)) / (sfaved[i] + sf + eps); - - }//now luminance coefficients are denoised - -#endif } -void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir, +void ImProcFunctions::ShrinkAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir, float *noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb, float * madL, float * madaab, bool madCalculated) @@ -2700,7 +2647,6 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele float * sfaveab = buffer[0] + 32; float * sfaveabd = buffer[1] + 64; - float * blurBuffer = buffer[2] + 96; int W_ab = WaveletCoeffs_ab.level_W(level); int H_ab = WaveletCoeffs_ab.level_H(level); @@ -2724,12 +2670,12 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele if (noisevar_ab > 0.001f) { madab = useNoiseCCurve ? madab : madab * noisevar_ab; #ifdef __SSE2__ - __m128 onev = _mm_set1_ps(1.f); - __m128 mad_abrv = _mm_set1_ps(madab); + vfloat onev = F2V(1.f); + vfloat mad_abrv = F2V(madab); - __m128 rmadLm9v = onev / _mm_set1_ps(mad_L * 9.f); - __m128 mad_abv ; - __m128 mag_Lv, mag_abv; + vfloat rmadLm9v = onev / F2V(mad_L * 9.f); + vfloat mad_abv ; + vfloat mag_Lv, mag_abv; int coeffloc_ab; for (coeffloc_ab = 0; coeffloc_ab < H_ab * W_ab - 3; coeffloc_ab += 4) { @@ -2738,7 +2684,7 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele mag_Lv = LVFU(WavCoeffs_L[dir][coeffloc_ab]); mag_abv = SQRV(LVFU(WavCoeffs_ab[dir][coeffloc_ab])); mag_Lv = (SQRV(mag_Lv)) * rmadLm9v; - _mm_storeu_ps(&sfaveab[coeffloc_ab], (onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv)))); + STVFU(sfaveab[coeffloc_ab], (onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv)))); } // few remaining pixels @@ -2761,18 +2707,18 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele #endif - boxblur(sfaveab, sfaveabd, blurBuffer, level + 2, level + 2, W_ab, H_ab); //increase smoothness by locally averaging shrinkage + boxblur(sfaveab, sfaveabd, level + 2, W_ab, H_ab, false); //increase smoothness by locally averaging shrinkage #ifdef __SSE2__ - __m128 epsv = _mm_set1_ps(eps); - __m128 sfabv; - __m128 sfaveabv; + vfloat epsv = F2V(eps); + vfloat sfabv; + vfloat sfaveabv; for (coeffloc_ab = 0; coeffloc_ab < H_ab * W_ab - 3; coeffloc_ab += 4) { sfabv = LVFU(sfaveab[coeffloc_ab]); sfaveabv = LVFU(sfaveabd[coeffloc_ab]); //use smoothed shrinkage unless local shrinkage is much less - _mm_storeu_ps(&WavCoeffs_ab[dir][coeffloc_ab], LVFU(WavCoeffs_ab[dir][coeffloc_ab]) * (SQRV(sfaveabv) + SQRV(sfabv)) / (sfaveabv + sfabv + epsv)); + STVFU(WavCoeffs_ab[dir][coeffloc_ab], LVFU(WavCoeffs_ab[dir][coeffloc_ab]) * (SQRV(sfaveabv) + SQRV(sfabv)) / (sfaveabv + sfabv + epsv)); } // few remaining pixels @@ -2919,8 +2865,8 @@ void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b, } -void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice, +void ImProcFunctions::WaveletDenoiseAll_info(int levwav, const wavelet_decomposition &WaveletCoeffs_a, + const wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb) { @@ -3106,7 +3052,7 @@ void ImProcFunctions::calcautodn_info(float &chaut, float &delta, int Nb, int le } -void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread) +void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, const LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread) { if ((settings->leveldnautsimpl == 1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl == 0 && dnparams.C2method == "MANU")) { //nothing to do @@ -3173,8 +3119,8 @@ void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const float gain = pow(2.0f, float(expcomp)); - int tilesize; - int overlap; + int tilesize = 0; + int overlap = 0; if (settings->leveldnti == 0) { tilesize = 1024; @@ -3275,16 +3221,16 @@ void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, for (int i = tiletop; i < tilebottom; i += 2) { int i1 = i - tiletop; #ifdef __SSE2__ - __m128 aNv, bNv; - __m128 c100v = _mm_set1_ps(100.f); + vfloat aNv, bNv; + vfloat c100v = F2V(100.f); int j; for (j = tileleft; j < tileright - 7; j += 8) { int j1 = j - tileleft; aNv = LVFU(acalc[i >> 1][j >> 1]); bNv = LVFU(bcalc[i >> 1][j >> 1]); - _mm_storeu_ps(&noisevarhue[i1 >> 1][j1 >> 1], xatan2f(bNv, aNv)); - _mm_storeu_ps(&noisevarchrom[i1 >> 1][j1 >> 1], vmaxf(vsqrtf(SQRV(aNv) + SQRV(bNv)),c100v)); + STVFU(noisevarhue[i1 >> 1][j1 >> 1], xatan2f(bNv, aNv)); + STVFU(noisevarchrom[i1 >> 1][j1 >> 1], vmaxf(vsqrtf(SQRV(aNv) + SQRV(bNv)),c100v)); } for (; j < tileright; j += 2) { diff --git a/rtengine/boxblur.cc b/rtengine/boxblur.cc new file mode 100644 index 000000000..491ffae14 --- /dev/null +++ b/rtengine/boxblur.cc @@ -0,0 +1,420 @@ +/* + * This file is part of RawTherapee. + * + * Copyright (C) 2010 Emil Martinec + * Copyright (C) 2019 Ingo Weyrich + * + * RawTherapee is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * RawTherapee is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with RawTherapee. If not, see . +*/ + +#include +#include + +#include "boxblur.h" + +#include "rt_math.h" +#include "opthelper.h" + +namespace rtengine +{ + +void boxblur(float** src, float** dst, int radius, int W, int H, bool multiThread) +{ + //box blur using rowbuffers and linebuffers instead of a full size buffer + + if (radius == 0) { + if (src != dst) { +#ifdef _OPENMP + #pragma omp parallel for if (multiThread) +#endif + + for (int row = 0; row < H; ++row) { + for (int col = 0; col < W; ++col) { + dst[row][col] = src[row][col]; + } + } + } + return; + } + + constexpr int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache +#ifdef _OPENMP + #pragma omp parallel if (multiThread) +#endif + { + std::unique_ptr buffer(new float[numCols * (radius + 1)]); + + //horizontal blur + float* const lineBuffer = buffer.get(); +#ifdef _OPENMP + #pragma omp for +#endif + for (int row = 0; row < H; ++row) { + float len = radius + 1; + float tempval = src[row][0]; + lineBuffer[0] = tempval; + for (int j = 1; j <= radius; j++) { + tempval += src[row][j]; + } + + tempval /= len; + dst[row][0] = tempval; + + for (int col = 1; col <= radius; ++col) { + lineBuffer[col] = src[row][col]; + tempval = (tempval * len + src[row][col + radius]) / (len + 1); + dst[row][col] = tempval; + ++len; + } + int pos = 0; + for (int col = radius + 1; col < W - radius; ++col) { + const float oldVal = lineBuffer[pos]; + lineBuffer[pos] = src[row][col]; + tempval = tempval + (src[row][col + radius] - oldVal) / len; + dst[row][col] = tempval; + ++pos; + pos = pos <= radius ? pos : 0; + } + + for (int col = W - radius; col < W; ++col) { + tempval = (tempval * len - lineBuffer[pos]) / (len - 1); + dst[row][col] = tempval; + --len; + ++pos; + pos = pos <= radius ? pos : 0; + } + } + + //vertical blur +#ifdef __SSE2__ + vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer.get(); + const vfloat leninitv = F2V(radius + 1); + const vfloat onev = F2V(1.f); + vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv; + +#ifdef _OPENMP + #pragma omp for nowait +#endif + + for (int col = 0; col < W - 7; col += 8) { + lenv = leninitv; + tempv = LVFU(dst[0][col]); + temp1v = LVFU(dst[0][col + 4]); + rowBuffer[0][0] = tempv; + rowBuffer[0][1] = temp1v; + + for (int i = 1; i <= radius; ++i) { + tempv = tempv + LVFU(dst[i][col]); + temp1v = temp1v + LVFU(dst[i][col + 4]); + } + + tempv = tempv / lenv; + temp1v = temp1v / lenv; + STVFU(dst[0][col], tempv); + STVFU(dst[0][col + 4], temp1v); + + for (int row = 1; row <= radius; ++row) { + rowBuffer[row][0] = LVFU(dst[row][col]); + rowBuffer[row][1] = LVFU(dst[row][col + 4]); + lenp1v = lenv + onev; + tempv = (tempv * lenv + LVFU(dst[row + radius][col])) / lenp1v; + temp1v = (temp1v * lenv + LVFU(dst[row + radius][col + 4])) / lenp1v; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + lenv = lenp1v; + } + + rlenv = onev / lenv; + int pos = 0; + for (int row = radius + 1; row < H - radius; ++row) { + vfloat oldVal0 = rowBuffer[pos][0]; + vfloat oldVal1 = rowBuffer[pos][1]; + rowBuffer[pos][0] = LVFU(dst[row][col]); + rowBuffer[pos][1] = LVFU(dst[row][col + 4]); + tempv = tempv + (LVFU(dst[row + radius][col]) - oldVal0) * rlenv ; + temp1v = temp1v + (LVFU(dst[row + radius][col + 4]) - oldVal1) * rlenv ; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + ++pos; + pos = pos <= radius ? pos : 0; + } + + for (int row = H - radius; row < H; ++row) { + lenm1v = lenv - onev; + tempv = (tempv * lenv - rowBuffer[pos][0]) / lenm1v; + temp1v = (temp1v * lenv - rowBuffer[pos][1]) / lenm1v; + STVFU(dst[row][col], tempv); + STVFU(dst[row][col + 4], temp1v); + lenv = lenm1v; + ++pos; + pos = pos <= radius ? pos : 0; + } + } + +#else + float (* const rowBuffer)[8] = (float(*)[8]) buffer.get(); +#ifdef _OPENMP + #pragma omp for nowait +#endif + + for (int col = 0; col < W - numCols + 1; col += 8) { + float len = radius + 1; + + for (int k = 0; k < numCols; ++k) { + rowBuffer[0][k] = dst[0][col + k]; + } + + for (int i = 1; i <= radius; ++i) { + for (int k = 0; k < numCols; ++k) { + dst[0][col + k] += dst[i][col + k]; + } + } + + for(int k = 0; k < numCols; ++k) { + dst[0][col + k] /= len; + } + + for (int row = 1; row <= radius; ++row) { + for(int k = 0; k < numCols; ++k) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); + } + + len ++; + } + + int pos = 0; + for (int row = radius + 1; row < H - radius; ++row) { + for(int k = 0; k < numCols; ++k) { + float oldVal = rowBuffer[pos][k]; + rowBuffer[pos][k] = dst[row][col + k]; + dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) / len; + } + ++pos; + pos = pos <= radius ? pos : 0; + } + + for (int row = H - radius; row < H; ++row) { + for(int k = 0; k < numCols; ++k) { + dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[pos][k]) / (len - 1); + } + len --; + ++pos; + pos = pos <= radius ? pos : 0; + } + } + +#endif + //vertical blur, remaining columns +#ifdef _OPENMP + #pragma omp single +#endif + { + const int remaining = W % numCols; + + if (remaining > 0) { + float (* const rowBuffer)[8] = (float(*)[8]) buffer.get(); + const int col = W - remaining; + + float len = radius + 1; + for(int k = 0; k < remaining; ++k) { + rowBuffer[0][k] = dst[0][col + k]; + } + for (int row = 1; row <= radius; ++row) { + for(int k = 0; k < remaining; ++k) { + dst[0][col + k] += dst[row][col + k]; + } + } + for(int k = 0; k < remaining; ++k) { + dst[0][col + k] /= len; + } + for (int row = 1; row <= radius; ++row) { + for(int k = 0; k < remaining; ++k) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); + } + len ++; + } + const float rlen = 1.f / len; + int pos = 0; + for (int row = radius + 1; row < H - radius; ++row) { + for(int k = 0; k < remaining; ++k) { + float oldVal = rowBuffer[pos][k]; + rowBuffer[pos][k] = dst[row][col + k]; + dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) * rlen; + } + ++pos; + pos = pos <= radius ? pos : 0; + } + for (int row = H - radius; row < H; ++row) { + for(int k = 0; k < remaining; ++k) { + dst[row][col + k] = (dst[(row - 1)][col + k] * len - rowBuffer[pos][k]) / (len - 1); + } + len --; + ++pos; + pos = pos <= radius ? pos : 0; + } + } + } + } +} + +void boxabsblur(float** src, float** dst, int radius, int W, int H, bool multiThread) +{ + //abs box blur using rowbuffers and linebuffers instead of a full size buffer, W should be a multiple of 16 + + if (radius == 0) { + if (src != dst) { +#ifdef _OPENMP + #pragma omp parallel for if (multiThread) +#endif + + for (int row = 0; row < H; ++row) { + for (int col = 0; col < W; ++col) { + dst[row][col] = std::fabs(src[row][col]); + } + } + } + return; + } + + constexpr int numCols = 16; // process numCols columns at once for better usage of L1 cpu cache +#ifdef _OPENMP + #pragma omp parallel if (multiThread) +#endif + { + float buffer[numCols * (radius + 1)] ALIGNED64; + + //horizontal blur + float* const lineBuffer = buffer; +#ifdef _OPENMP + #pragma omp for +#endif + for (int row = 0; row < H; ++row) { + float len = radius + 1; + float tempval = std::fabs(src[row][0]); + lineBuffer[0] = tempval; + for (int j = 1; j <= radius; j++) { + tempval += std::fabs(src[row][j]); + } + + tempval /= len; + dst[row][0] = tempval; + + for (int col = 1; col <= radius; ++col) { + lineBuffer[col] = std::fabs(src[row][col]); + tempval = (tempval * len + std::fabs(src[row][col + radius])) / (len + 1); + dst[row][col] = tempval; + ++len; + } + + const float rlen = 1.f / len; + int pos = 0; + for (int col = radius + 1; col < W - radius; ++col) { + const float oldVal = lineBuffer[pos]; + lineBuffer[pos] = std::fabs(src[row][col]); + tempval = tempval + (std::fabs(src[row][col + radius]) - oldVal) * rlen; + dst[row][col] = tempval; + ++pos; + pos = pos <= radius ? pos : 0; + } + + for (int col = W - radius; col < W; ++col) { + tempval = (tempval * len - lineBuffer[pos]) / (len - 1); + dst[row][col] = tempval; + --len; + ++pos; + pos = pos <= radius ? pos : 0; + } + } + + //vertical blur + float (* const rowBuffer)[numCols] = (float(*)[numCols]) buffer; +#ifdef _OPENMP + #pragma omp for +#endif + + for (int col = 0; col < W; col += numCols) { + float len = radius + 1; + + for (int k = 0; k < numCols; ++k) { + rowBuffer[0][k] = dst[0][col + k]; + } + + for (int i = 1; i <= radius; ++i) { + for (int k = 0; k < numCols; ++k) { + dst[0][col + k] += dst[i][col + k]; + } + } + + for(int k = 0; k < numCols; ++k) { + dst[0][col + k] /= len; + } + + for (int row = 1; row <= radius; ++row) { + for(int k = 0; k < numCols; ++k) { + rowBuffer[row][k] = dst[row][col + k]; + dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); + } + + ++len; + } + + const float rlen = 1.f / len; + int pos = 0; + for (int row = radius + 1; row < H - radius; ++row) { + for(int k = 0; k < numCols; ++k) { + float oldVal = rowBuffer[pos][k]; + rowBuffer[pos][k] = dst[row][col + k]; + dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) * rlen; + } + ++pos; + pos = pos <= radius ? pos : 0; + } + + for (int row = H - radius; row < H; ++row) { + for(int k = 0; k < numCols; ++k) { + dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[pos][k]) / (len - 1); + } + --len; + ++pos; + pos = pos <= radius ? pos : 0; + } + } + } +} + +void boxblur(float* src, float* dst, int radius, int W, int H, bool multiThread) +{ + float* srcp[H]; + float* dstp[H]; + for (int i = 0; i < H; ++i) { + srcp[i] = src + i * W; + dstp[i] = dst + i * W; + } + boxblur(srcp, dstp, radius, W, H, multiThread); +} + +void boxabsblur(float* src, float* dst, int radius, int W, int H, bool multiThread) +{ + float* srcp[H]; + float* dstp[H]; + for (int i = 0; i < H; ++i) { + srcp[i] = src + i * W; + dstp[i] = dst + i * W; + } + boxabsblur(srcp, dstp, radius, W, H, multiThread); +} + +} diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h index 1689c5ed1..57fba9119 100644 --- a/rtengine/boxblur.h +++ b/rtengine/boxblur.h @@ -1,7 +1,7 @@ /* * This file is part of RawTherapee. * - * Copyright (C) 2010 Emil Martinec + * Copyright (C) 2019 Ingo Weyrich * * RawTherapee is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,873 +15,15 @@ * * You should have received a copy of the GNU General Public License * along with RawTherapee. If not, see . - */ -#ifndef _BOXBLUR_H_ -#define _BOXBLUR_H_ - -#include -#include -#include -#include -#include -#include "alignedbuffer.h" -#include "rt_math.h" -#include "opthelper.h" +*/ +#pragma once namespace rtengine { -// classical filtering if the support window is small: - -template void boxblur (T** src, A** dst, int radx, int rady, int W, int H) -{ - //box blur image; box range = (radx,rady) - assert(2*radx+1 < W); - assert(2*rady+1 < H); - - AlignedBuffer* buffer = new AlignedBuffer (W * H); - float* temp = buffer->data; - - if (radx == 0) { -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int row = 0; row < H; row++) - for (int col = 0; col < W; col++) { - temp[row * W + col] = (float)src[row][col]; - } - } else { - //horizontal blur -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int row = 0; row < H; row++) { - int len = radx + 1; - temp[row * W + 0] = (float)src[row][0] / len; - - for (int j = 1; j <= radx; j++) { - temp[row * W + 0] += (float)src[row][j] / len; - } - - for (int col = 1; col <= radx; col++) { - temp[row * W + col] = (temp[row * W + col - 1] * len + (float)src[row][col + radx]) / (len + 1); - len ++; - } - - for (int col = radx + 1; col < W - radx; col++) { - temp[row * W + col] = temp[row * W + col - 1] + ((float)(src[row][col + radx] - src[row][col - radx - 1])) / len; - } - - for (int col = W - radx; col < W; col++) { - temp[row * W + col] = (temp[row * W + col - 1] * len - src[row][col - radx - 1]) / (len - 1); - len --; - } - } - } - - if (rady == 0) { -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int row = 0; row < H; row++) - for (int col = 0; col < W; col++) { - dst[row][col] = temp[row * W + col]; - } - } else { - //vertical blur -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int col = 0; col < W; col++) { - int len = rady + 1; - dst[0][col] = temp[0 * W + col] / len; - - for (int i = 1; i <= rady; i++) { - dst[0][col] += temp[i * W + col] / len; - } - - for (int row = 1; row <= rady; row++) { - dst[row][col] = (dst[(row - 1)][col] * len + temp[(row + rady) * W + col]) / (len + 1); - len ++; - } - - for (int row = rady + 1; row < H - rady; row++) { - dst[row][col] = dst[(row - 1)][col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len; - } - - for (int row = H - rady; row < H; row++) { - dst[row][col] = (dst[(row - 1)][col] * len - temp[(row - rady - 1) * W + col]) / (len - 1); - len --; - } - } - } - - delete buffer; +void boxblur(float** src, float** dst, int radius, int W, int H, bool multiThread); +void boxblur(float* src, float* dst, int radius, int W, int H, bool multiThread); +void boxabsblur(float** src, float** dst, int radius, int W, int H, bool multiThread); +void boxabsblur(float* src, float* dst, int radius, int W, int H, bool multiThread); } - -template void boxblur (T** src, A** dst, T* buffer, int radx, int rady, int W, int H) -{ - //box blur image; box range = (radx,rady) - - float* temp = buffer; - - if (radx == 0) { -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 0; row < H; row++) - for (int col = 0; col < W; col++) { - temp[row * W + col] = (float)src[row][col]; - } - } else { - //horizontal blur -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 0; row < H; row++) { - float len = radx + 1; - float tempval = (float)src[row][0]; - - for (int j = 1; j <= radx; j++) { - tempval += (float)src[row][j]; - } - - tempval /= len; - temp[row * W + 0] = tempval; - - for (int col = 1; col <= radx; col++) { - temp[row * W + col] = tempval = (tempval * len + (float)src[row][col + radx]) / (len + 1); - len ++; - } - - for (int col = radx + 1; col < W - radx; col++) { - temp[row * W + col] = tempval = tempval + ((float)(src[row][col + radx] - src[row][col - radx - 1])) / len; - } - - for (int col = W - radx; col < W; col++) { - temp[row * W + col] = tempval = (tempval * len - src[row][col - radx - 1]) / (len - 1); - len --; - } - } - } - - if (rady == 0) { -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 0; row < H; row++) - for (int col = 0; col < W; col++) { - dst[row][col] = temp[row * W + col]; - } - } else { - const int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache -#ifdef __SSE2__ - vfloat leninitv = F2V( (float)(rady + 1)); - vfloat onev = F2V( 1.f ); - vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv; - -#ifdef _OPENMP - #pragma omp for -#endif - - for (int col = 0; col < W - 7; col += 8) { - lenv = leninitv; - tempv = LVFU(temp[0 * W + col]); - temp1v = LVFU(temp[0 * W + col + 4]); - - for (int i = 1; i <= rady; i++) { - tempv = tempv + LVFU(temp[i * W + col]); - temp1v = temp1v + LVFU(temp[i * W + col + 4]); - } - - tempv = tempv / lenv; - temp1v = temp1v / lenv; - STVFU(dst[0][col], tempv); - STVFU(dst[0][col + 4], temp1v); - - for (int row = 1; row <= rady; row++) { - lenp1v = lenv + onev; - tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; - temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v; - STVFU(dst[row][col], tempv); - STVFU(dst[row][col + 4], temp1v); - lenv = lenp1v; - } - - rlenv = onev / lenv; - - for (int row = rady + 1; row < H - rady; row++) { - tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; - temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ; - STVFU(dst[row][col], tempv); - STVFU(dst[row][col + 4], temp1v); - } - - for (int row = H - rady; row < H; row++) { - lenm1v = lenv - onev; - tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; - temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v; - STVFU(dst[row][col], tempv); - STVFU(dst[row][col + 4], temp1v); - lenv = lenm1v; - } - } - -#else - //vertical blur -#ifdef _OPENMP - #pragma omp for -#endif - - for (int col = 0; col < W - numCols + 1; col += 8) { - float len = rady + 1; - - for(int k = 0; k < numCols; k++) { - dst[0][col + k] = temp[0 * W + col + k]; - } - - for (int i = 1; i <= rady; i++) { - for(int k = 0; k < numCols; k++) { - dst[0][col + k] += temp[i * W + col + k]; - } - } - - for(int k = 0; k < numCols; k++) { - dst[0][col + k] /= len; - } - - for (int row = 1; row <= rady; row++) { - for(int k = 0; k < numCols; k++) { - dst[row][col + k] = (dst[(row - 1)][col + k] * len + temp[(row + rady) * W + col + k]) / (len + 1); - } - - len ++; - } - - for (int row = rady + 1; row < H - rady; row++) { - for(int k = 0; k < numCols; k++) { - dst[row][col + k] = dst[(row - 1)][col + k] + (temp[(row + rady) * W + col + k] - temp[(row - rady - 1) * W + col + k]) / len; - } - } - - for (int row = H - rady; row < H; row++) { - for(int k = 0; k < numCols; k++) { - dst[row][col + k] = (dst[(row - 1)][col + k] * len - temp[(row - rady - 1) * W + col + k]) / (len - 1); - } - - len --; - } - } - -#endif -#ifdef _OPENMP - #pragma omp single -#endif - - for (int col = W - (W % numCols); col < W; col++) { - float len = rady + 1; - dst[0][col] = temp[0 * W + col] / len; - - for (int i = 1; i <= rady; i++) { - dst[0][col] += temp[i * W + col] / len; - } - - for (int row = 1; row <= rady; row++) { - dst[row][col] = (dst[(row - 1)][col] * len + temp[(row + rady) * W + col]) / (len + 1); - len ++; - } - - for (int row = rady + 1; row < H - rady; row++) { - dst[row][col] = dst[(row - 1)][col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len; - } - - for (int row = H - rady; row < H; row++) { - dst[row][col] = (dst[(row - 1)][col] * len - temp[(row - rady - 1) * W + col]) / (len - 1); - len --; - } - } - } - -} - -inline void boxblur (float** src, float** dst, int radius, int W, int H, bool multiThread) -{ - //box blur using rowbuffers and linebuffers instead of a full size buffer - - if (radius == 0) { - if (src != dst) { -#ifdef _OPENMP - #pragma omp parallel for if (multiThread) -#endif - - for (int row = 0; row < H; row++) { - for (int col = 0; col < W; col++) { - dst[row][col] = src[row][col]; - } - } - } - return; - } - - constexpr int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache -#ifdef _OPENMP - #pragma omp parallel if (multiThread) -#endif - { - std::unique_ptr buffer(new float[numCols * (radius + 1)]); - - //horizontal blur - float* const lineBuffer = buffer.get(); -#ifdef _OPENMP - #pragma omp for -#endif - for (int row = 0; row < H; row++) { - float len = radius + 1; - float tempval = src[row][0]; - lineBuffer[0] = tempval; - for (int j = 1; j <= radius; j++) { - tempval += src[row][j]; - } - - tempval /= len; - dst[row][0] = tempval; - - for (int col = 1; col <= radius; col++) { - lineBuffer[col] = src[row][col]; - tempval = (tempval * len + src[row][col + radius]) / (len + 1); - dst[row][col] = tempval; - ++len; - } - int pos = 0; - for (int col = radius + 1; col < W - radius; col++) { - const float oldVal = lineBuffer[pos]; - lineBuffer[pos] = src[row][col]; - tempval = tempval + (src[row][col + radius] - oldVal) / len; - dst[row][col] = tempval; - ++pos; - pos = pos <= radius ? pos : 0; - } - - for (int col = W - radius; col < W; col++) { - tempval = (tempval * len - lineBuffer[pos]) / (len - 1); - dst[row][col] = tempval; - --len; - ++pos; - pos = pos <= radius ? pos : 0; - } - } - - //vertical blur -#ifdef __SSE2__ - vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer.get(); - const vfloat leninitv = F2V(radius + 1); - const vfloat onev = F2V(1.f); - vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv; - -#ifdef _OPENMP - #pragma omp for nowait -#endif - - for (int col = 0; col < W - 7; col += 8) { - lenv = leninitv; - tempv = LVFU(dst[0][col]); - temp1v = LVFU(dst[0][col + 4]); - rowBuffer[0][0] = tempv; - rowBuffer[0][1] = temp1v; - - for (int i = 1; i <= radius; i++) { - tempv = tempv + LVFU(dst[i][col]); - temp1v = temp1v + LVFU(dst[i][col + 4]); - } - - tempv = tempv / lenv; - temp1v = temp1v / lenv; - STVFU(dst[0][col], tempv); - STVFU(dst[0][col + 4], temp1v); - - for (int row = 1; row <= radius; row++) { - rowBuffer[row][0] = LVFU(dst[row][col]); - rowBuffer[row][1] = LVFU(dst[row][col + 4]); - lenp1v = lenv + onev; - tempv = (tempv * lenv + LVFU(dst[row + radius][col])) / lenp1v; - temp1v = (temp1v * lenv + LVFU(dst[row + radius][col + 4])) / lenp1v; - STVFU(dst[row][col], tempv); - STVFU(dst[row][col + 4], temp1v); - lenv = lenp1v; - } - - rlenv = onev / lenv; - int pos = 0; - for (int row = radius + 1; row < H - radius; row++) { - vfloat oldVal0 = rowBuffer[pos][0]; - vfloat oldVal1 = rowBuffer[pos][1]; - rowBuffer[pos][0] = LVFU(dst[row][col]); - rowBuffer[pos][1] = LVFU(dst[row][col + 4]); - tempv = tempv + (LVFU(dst[row + radius][col]) - oldVal0) * rlenv ; - temp1v = temp1v + (LVFU(dst[row + radius][col + 4]) - oldVal1) * rlenv ; - STVFU(dst[row][col], tempv); - STVFU(dst[row][col + 4], temp1v); - ++pos; - pos = pos <= radius ? pos : 0; - } - - for (int row = H - radius; row < H; row++) { - lenm1v = lenv - onev; - tempv = (tempv * lenv - rowBuffer[pos][0]) / lenm1v; - temp1v = (temp1v * lenv - rowBuffer[pos][1]) / lenm1v; - STVFU(dst[row][col], tempv); - STVFU(dst[row][col + 4], temp1v); - lenv = lenm1v; - ++pos; - pos = pos <= radius ? pos : 0; - } - } - -#else - float (* const rowBuffer)[8] = (float(*)[8]) buffer.get(); -#ifdef _OPENMP - #pragma omp for nowait -#endif - - for (int col = 0; col < W - numCols + 1; col += 8) { - float len = radius + 1; - - for (int k = 0; k < numCols; k++) { - rowBuffer[0][k] = dst[0][col + k]; - } - - for (int i = 1; i <= radius; i++) { - for (int k = 0; k < numCols; k++) { - dst[0][col + k] += dst[i][col + k]; - } - } - - for(int k = 0; k < numCols; k++) { - dst[0][col + k] /= len; - } - - for (int row = 1; row <= radius; row++) { - for(int k = 0; k < numCols; k++) { - rowBuffer[row][k] = dst[row][col + k]; - dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); - } - - len ++; - } - - int pos = 0; - for (int row = radius + 1; row < H - radius; row++) { - for(int k = 0; k < numCols; k++) { - float oldVal = rowBuffer[pos][k]; - rowBuffer[pos][k] = dst[row][col + k]; - dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) / len; - } - ++pos; - pos = pos <= radius ? pos : 0; - } - - for (int row = H - radius; row < H; row++) { - for(int k = 0; k < numCols; k++) { - dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[pos][k]) / (len - 1); - } - len --; - ++pos; - pos = pos <= radius ? pos : 0; - } - } - -#endif - //vertical blur, remaining columns -#ifdef _OPENMP - #pragma omp single -#endif - { - const int remaining = W % numCols; - - if (remaining > 0) { - float (* const rowBuffer)[8] = (float(*)[8]) buffer.get(); - const int col = W - remaining; - - float len = radius + 1; - for(int k = 0; k < remaining; ++k) { - rowBuffer[0][k] = dst[0][col + k]; - } - for (int row = 1; row <= radius; ++row) { - for(int k = 0; k < remaining; ++k) { - dst[0][col + k] += dst[row][col + k]; - } - } - for(int k = 0; k < remaining; ++k) { - dst[0][col + k] /= len; - } - for (int row = 1; row <= radius; ++row) { - for(int k = 0; k < remaining; ++k) { - rowBuffer[row][k] = dst[row][col + k]; - dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1); - } - len ++; - } - const float rlen = 1.f / len; - int pos = 0; - for (int row = radius + 1; row < H - radius; ++row) { - for(int k = 0; k < remaining; ++k) { - float oldVal = rowBuffer[pos][k]; - rowBuffer[pos][k] = dst[row][col + k]; - dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) * rlen; - } - ++pos; - pos = pos <= radius ? pos : 0; - } - for (int row = H - radius; row < H; ++row) { - for(int k = 0; k < remaining; ++k) { - dst[row][col + k] = (dst[(row - 1)][col + k] * len - rowBuffer[pos][k]) / (len - 1); - } - len --; - ++pos; - pos = pos <= radius ? pos : 0; - } - } - } - } -} - -template void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H) -{ - //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) - - float* temp = buffer; - - if (radx == 0) { - for (int row = 0; row < H; row++) - for (int col = 0; col < W; col++) { - temp[row * W + col] = src[row * W + col]; - } - } else { - //horizontal blur - for (int row = H - 1; row >= 0; row--) { - int len = radx + 1; - float tempval = (float)src[row * W]; - - for (int j = 1; j <= radx; j++) { - tempval += (float)src[row * W + j]; - } - - tempval = tempval / len; - temp[row * W] = tempval; - - for (int col = 1; col <= radx; col++) { - tempval = (tempval * len + src[row * W + col + radx]) / (len + 1); - temp[row * W + col] = tempval; - len ++; - } - - float reclen = 1.f / len; - - for (int col = radx + 1; col < W - radx; col++) { - tempval = tempval + ((float)(src[row * W + col + radx] - src[row * W + col - radx - 1])) * reclen; - temp[row * W + col] = tempval; - } - - for (int col = W - radx; col < W; col++) { - tempval = (tempval * len - src[row * W + col - radx - 1]) / (len - 1); - temp[row * W + col] = tempval; - len --; - } - } - } - - if (rady == 0) { - for (int row = 0; row < H; row++) - for (int col = 0; col < W; col++) { - dst[row * W + col] = temp[row * W + col]; - } - } else { - //vertical blur -#ifdef __SSE2__ - vfloat leninitv = F2V( (float)(rady + 1)); - vfloat onev = F2V( 1.f ); - vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv; - int col; - - for (col = 0; col < W - 7; col += 8) { - lenv = leninitv; - tempv = LVFU(temp[0 * W + col]); - temp1v = LVFU(temp[0 * W + col + 4]); - - for (int i = 1; i <= rady; i++) { - tempv = tempv + LVFU(temp[i * W + col]); - temp1v = temp1v + LVFU(temp[i * W + col + 4]); - } - - tempv = tempv / lenv; - temp1v = temp1v / lenv; - STVFU(dst[0 * W + col], tempv); - STVFU(dst[0 * W + col + 4], temp1v); - - for (int row = 1; row <= rady; row++) { - lenp1v = lenv + onev; - tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; - temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v; - STVFU(dst[row * W + col], tempv); - STVFU(dst[row * W + col + 4], temp1v); - lenv = lenp1v; - } - - rlenv = onev / lenv; - - for (int row = rady + 1; row < H - rady; row++) { - tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; - temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ; - STVFU(dst[row * W + col], tempv); - STVFU(dst[row * W + col + 4], temp1v); - } - - for (int row = H - rady; row < H; row++) { - lenm1v = lenv - onev; - tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; - temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v; - STVFU(dst[row * W + col], tempv); - STVFU(dst[row * W + col + 4], temp1v); - lenv = lenm1v; - } - } - - for (; col < W - 3; col += 4) { - lenv = leninitv; - tempv = LVFU(temp[0 * W + col]); - - for (int i = 1; i <= rady; i++) { - tempv = tempv + LVFU(temp[i * W + col]); - } - - tempv = tempv / lenv; - STVFU(dst[0 * W + col], tempv); - - for (int row = 1; row <= rady; row++) { - lenp1v = lenv + onev; - tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v; - STVFU(dst[row * W + col], tempv); - lenv = lenp1v; - } - - rlenv = onev / lenv; - - for (int row = rady + 1; row < H - rady; row++) { - tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ; - STVFU(dst[row * W + col], tempv); - } - - for (int row = H - rady; row < H; row++) { - lenm1v = lenv - onev; - tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v; - STVFU(dst[row * W + col], tempv); - lenv = lenm1v; - } - } - - for (; col < W; col++) { - int len = rady + 1; - dst[0 * W + col] = temp[0 * W + col] / len; - - for (int i = 1; i <= rady; i++) { - dst[0 * W + col] += temp[i * W + col] / len; - } - - for (int row = 1; row <= rady; row++) { - dst[row * W + col] = (dst[(row - 1) * W + col] * len + temp[(row + rady) * W + col]) / (len + 1); - len ++; - } - - for (int row = rady + 1; row < H - rady; row++) { - dst[row * W + col] = dst[(row - 1) * W + col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len; - } - - for (int row = H - rady; row < H; row++) { - dst[row * W + col] = (dst[(row - 1) * W + col] * len - temp[(row - rady - 1) * W + col]) / (len - 1); - len --; - } - } - -#else - - for (int col = 0; col < W; col++) { - int len = rady + 1; - dst[0 * W + col] = temp[0 * W + col] / len; - - for (int i = 1; i <= rady; i++) { - dst[0 * W + col] += temp[i * W + col] / len; - } - - for (int row = 1; row <= rady; row++) { - dst[row * W + col] = (dst[(row - 1) * W + col] * len + temp[(row + rady) * W + col]) / (len + 1); - len ++; - } - - for (int row = rady + 1; row < H - rady; row++) { - dst[row * W + col] = dst[(row - 1) * W + col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len; - } - - for (int row = H - rady; row < H; row++) { - dst[row * W + col] = (dst[(row - 1) * W + col] * len - temp[(row - rady - 1) * W + col]) / (len - 1); - len --; - } - } - -#endif - } - -} - -template void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H, float * temp) -{ - - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) - - if (radx == 0) { - for (int row = 0; row < H; row++) - for (int col = 0; col < W; col++) { - temp[row * W + col] = fabs(src[row * W + col]); - } - } else { - //horizontal blur - for (int row = 0; row < H; row++) { - int len = radx + 1; - float tempval = fabsf((float)src[row * W + 0]); - - for (int j = 1; j <= radx; j++) { - tempval += fabsf((float)src[row * W + j]); - } - - tempval /= len; - temp[row * W + 0] = tempval; - - for (int col = 1; col <= radx; col++) { - tempval = (tempval * len + fabsf(src[row * W + col + radx])) / (len + 1); - temp[row * W + col] = tempval; - len ++; - } - - float rlen = 1.f / (float)len; - - for (int col = radx + 1; col < W - radx; col++) { - tempval = tempval + ((float)(fabsf(src[row * W + col + radx]) - fabsf(src[row * W + col - radx - 1]))) * rlen; - temp[row * W + col] = tempval; - } - - for (int col = W - radx; col < W; col++) { - tempval = (tempval * len - fabsf(src[row * W + col - radx - 1])) / (len - 1); - temp[row * W + col] = tempval; - len --; - } - } - } - - if (rady == 0) { - for (int row = 0; row < H; row++) - for (int col = 0; col < W; col++) { - dst[row * W + col] = temp[row * W + col]; - } - } else { - //vertical blur -#ifdef __SSE2__ - vfloat leninitv = F2V( (float)(rady + 1)); - vfloat onev = F2V( 1.f ); - vfloat tempv, lenv, lenp1v, lenm1v, rlenv; - - for (int col = 0; col < W - 3; col += 4) { - lenv = leninitv; - tempv = LVF(temp[0 * W + col]); - - for (int i = 1; i <= rady; i++) { - tempv = tempv + LVF(temp[i * W + col]); - } - - tempv = tempv / lenv; - STVF(dst[0 * W + col], tempv); - - for (int row = 1; row <= rady; row++) { - lenp1v = lenv + onev; - tempv = (tempv * lenv + LVF(temp[(row + rady) * W + col])) / lenp1v; - STVF(dst[row * W + col], tempv); - lenv = lenp1v; - } - - rlenv = onev / lenv; - - for (int row = rady + 1; row < H - rady; row++) { - tempv = tempv + (LVF(temp[(row + rady) * W + col]) - LVF(temp[(row - rady - 1) * W + col])) * rlenv; - STVF(dst[row * W + col], tempv); - } - - for (int row = H - rady; row < H; row++) { - lenm1v = lenv - onev; - tempv = (tempv * lenv - LVF(temp[(row - rady - 1) * W + col])) / lenm1v; - STVF(dst[row * W + col], tempv); - lenv = lenm1v; - } - } - - for (int col = W - (W % 4); col < W; col++) { - int len = rady + 1; - dst[0 * W + col] = temp[0 * W + col] / len; - - for (int i = 1; i <= rady; i++) { - dst[0 * W + col] += temp[i * W + col] / len; - } - - for (int row = 1; row <= rady; row++) { - dst[row * W + col] = (dst[(row - 1) * W + col] * len + temp[(row + rady) * W + col]) / (len + 1); - len ++; - } - - for (int row = rady + 1; row < H - rady; row++) { - dst[row * W + col] = dst[(row - 1) * W + col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len; - } - - for (int row = H - rady; row < H; row++) { - dst[row * W + col] = (dst[(row - 1) * W + col] * len - temp[(row - rady - 1) * W + col]) / (len - 1); - len --; - } - } - -#else - - for (int col = 0; col < W; col++) { - int len = rady + 1; - dst[0 * W + col] = temp[0 * W + col] / len; - - for (int i = 1; i <= rady; i++) { - dst[0 * W + col] += temp[i * W + col] / len; - } - - for (int row = 1; row <= rady; row++) { - dst[row * W + col] = (dst[(row - 1) * W + col] * len + temp[(row + rady) * W + col]) / (len + 1); - len ++; - } - - for (int row = rady + 1; row < H - rady; row++) { - dst[row * W + col] = dst[(row - 1) * W + col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len; - } - - for (int row = H - rady; row < H; row++) { - dst[row * W + col] = (dst[(row - 1) * W + col] * len - temp[(row - rady - 1) * W + col]) / (len - 1); - len --; - } - } - -#endif - } - -} - -} -#endif /* _BOXBLUR_H_ */ diff --git a/rtengine/gauss.cc b/rtengine/gauss.cc index ca330f9b9..dad1c4954 100644 --- a/rtengine/gauss.cc +++ b/rtengine/gauss.cc @@ -1349,7 +1349,7 @@ template void gaussVerticalmult (T** src, T** dst, const int W, const i } #endif -template void gaussianBlurImpl(T** src, T** dst, const int W, const int H, const double sigma, T *buffer = nullptr, eGaussType gausstype = GAUSS_STANDARD, T** buffer2 = nullptr) +template void gaussianBlurImpl(T** src, T** dst, const int W, const int H, const double sigma, bool useBoxBlur, eGaussType gausstype = GAUSS_STANDARD, T** buffer2 = nullptr) { static constexpr auto GAUSS_SKIP = 0.25; static constexpr auto GAUSS_3X3_LIMIT = 0.6; @@ -1357,7 +1357,7 @@ template void gaussianBlurImpl(T** src, T** dst, const int W, const int static constexpr auto GAUSS_7X7_LIMIT = 1.15; static constexpr auto GAUSS_DOUBLE = 25.0; - if(buffer) { + if (useBoxBlur) { // special variant for very large sigma, currently only used by retinex algorithm // use iterated boxblur to approximate gaussian blur // Compute ideal averaging filter width and number of iterations @@ -1393,10 +1393,10 @@ template void gaussianBlurImpl(T** src, T** dst, const int W, const int sizes[i] = ((i < m ? wl : wu) - 1) / 2; } - rtengine::boxblur(src, dst, buffer, sizes[0], sizes[0], W, H); + rtengine::boxblur(src, dst, sizes[0], W, H, true); for(int i = 1; i < n; i++) { - rtengine::boxblur(dst, dst, buffer, sizes[i], sizes[i], W, H); + rtengine::boxblur(dst, dst, sizes[i], W, H, true); } } else { if (sigma < GAUSS_SKIP) { @@ -1532,8 +1532,8 @@ template void gaussianBlurImpl(T** src, T** dst, const int W, const int } } -void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, float *buffer, eGaussType gausstype, float** buffer2) +void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, bool useBoxBlur, eGaussType gausstype, float** buffer2) { - gaussianBlurImpl(src, dst, W, H, sigma, buffer, gausstype, buffer2); + gaussianBlurImpl(src, dst, W, H, sigma, useBoxBlur, gausstype, buffer2); } diff --git a/rtengine/gauss.h b/rtengine/gauss.h index b63301d2b..f78762df3 100644 --- a/rtengine/gauss.h +++ b/rtengine/gauss.h @@ -21,6 +21,6 @@ enum eGaussType {GAUSS_STANDARD, GAUSS_MULT, GAUSS_DIV}; -void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, float *buffer = nullptr, eGaussType gausstype = GAUSS_STANDARD, float** buffer2 = nullptr); +void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, bool useBoxBlur = false, eGaussType gausstype = GAUSS_STANDARD, float** buffer2 = nullptr); #endif \ No newline at end of file diff --git a/rtengine/guidedfilter.cc b/rtengine/guidedfilter.cc index 159e89504..feb108198 100644 --- a/rtengine/guidedfilter.cc +++ b/rtengine/guidedfilter.cc @@ -105,7 +105,7 @@ void guidedFilter(const array2D &guide, const array2D &src, array2 [multithread](array2D &d, array2D &s, int rad) -> void { rad = LIM(rad, 0, (min(s.width(), s.height()) - 1) / 2 - 1); - boxblur(s, d, rad, s.width(), s.height(), multithread); + boxblur(static_cast(s), static_cast(d), rad, s.width(), s.height(), multithread); }; const int W = src.width(); diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 84da1cacc..f88108422 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -198,20 +198,20 @@ public: void Median_Denoise(float **src, float **dst, float upperBound, int width, int height, Median medianType, int iterations, int numThreads, float **buffer = nullptr); void RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi); void RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, const bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope); - void RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float & maxblueaut, float &minredaut, float & minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread = false); - void RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer); //for DCT + void RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, bool isRAW, const LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float & maxblueaut, float &minredaut, float & minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread = false); + void RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail); //for DCT void RGBoutput_tile_row(float *bloxrow_L, float ** Ldetail, float ** tilemask_out, int height, int width, int top); - bool WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge); - bool WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb); - void WaveletDenoiseAll_info(int levwav, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float & minblueaut, int schoice, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, + bool WaveletDenoiseAllL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge); + bool WaveletDenoiseAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb); + void WaveletDenoiseAll_info(int levwav, const wavelet_decomposition &WaveletCoeffs_a, + const wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float & minblueaut, int schoice, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb); - bool WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3]); - bool WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, + bool WaveletDenoiseAll_BiShrinkL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3]); + bool WaveletDenoiseAll_BiShrinkAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb); - void ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, float *noisevarlum, float * madL, float * vari, int edge); - void ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir, + void ShrinkAllL(const wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, float *noisevarlum, float * madL, float * vari, int edge); + void ShrinkAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir, float *noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb, float * madL, float * madaab = nullptr, bool madCalculated = false); void ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b, int W_ab, int H_ab, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice, int lvl, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, diff --git a/rtengine/ipretinex.cc b/rtengine/ipretinex.cc index 7117c9f2a..de6f4960a 100644 --- a/rtengine/ipretinex.cc +++ b/rtengine/ipretinex.cc @@ -43,6 +43,7 @@ #include "gauss.h" #include "improcfun.h" +#include "jaggedarray.h" #include "median.h" #include "opthelper.h" #include "procparams.h" @@ -50,8 +51,6 @@ #include "rtengine.h" #include "StopWatch.h" -#define clipretinex( val, minv, maxv ) (( val = (val < minv ? minv : val ) ) > maxv ? maxv : val ) - namespace { void retinex_scales( float* scales, int nscales, int mode, int s, float high) @@ -138,360 +137,422 @@ namespace rtengine extern const Settings* settings; -void RawImageSource::MSR(float** luminance, float** originalLuminance, float **exLuminance, LUTf & mapcurve, bool &mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax) +void RawImageSource::MSR(float** luminance, float** originalLuminance, float **exLuminance, const LUTf& mapcurve, bool mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax) { +BENCHFUN + if (!deh.enabled) { + return; + } - if (deh.enabled) {//enabled - float maxtr, mintr; - constexpr float eps = 2.f; - bool useHsl = deh.retinexcolorspace == "HSLLOG"; - bool useHslLin = deh.retinexcolorspace == "HSLLIN"; - float offse = (float) deh.offs; //def = 0 not use - int iter = deh.iter; - int gradient = deh.scal; - int scal = 3;//disabled scal - int nei = (int) (2.8f * deh.neigh); //def = 220 - float vart = (float)deh.vart / 100.f;//variance - float gradvart = (float)deh.grad; - float gradstr = (float)deh.grads; - float strength = (float) deh.str / 100.f; // Blend with original L channel data - float limD = (float) deh.limd; - limD = pow(limD, 1.7f);//about 2500 enough - limD *= useHslLin ? 10.f : 1.f; - float ilimD = 1.f / limD; - float hig = ((float) deh.highl) / 100.f; - scal = deh.skal; + constexpr float eps = 2.f; + const bool useHsl = deh.retinexcolorspace == "HSLLOG"; + const bool useHslLin = deh.retinexcolorspace == "HSLLIN"; + const float offse = deh.offs; //def = 0 not use + const int iter = deh.iter; + const int gradient = deh.scal; + int scal = deh.skal; + const int nei = 2.8f * deh.neigh; //def = 220 + const float vart = deh.vart / 100.f;//variance + const float gradvart = deh.grad; + const float gradstr = deh.grads; + const float strength = deh.str / 100.f; // Blend with original L channel data + float limD = deh.limd; + limD = pow(limD, 1.7f);//about 2500 enough + limD *= useHslLin ? 10.f : 1.f; + const float ilimD = 1.f / limD; + const float hig = deh.highl / 100.f; - int H_L = height; - int W_L = width; + const int H_L = height; + const int W_L = width; - float *tran[H_L] ALIGNED16; - float *tranBuffer = nullptr; + constexpr float elogt = 2.71828f; + bool lhutili = false; - constexpr float elogt = 2.71828f; - bool lhutili = false; + FlatCurve* shcurve = new FlatCurve(deh.lhcurve); //curve L=f(H) - FlatCurve* shcurve = new FlatCurve(deh.lhcurve); //curve L=f(H) - - if (!shcurve || shcurve->isIdentity()) { - if (shcurve) { - delete shcurve; - shcurve = nullptr; - } - } else { - lhutili = true; + if (!shcurve || shcurve->isIdentity()) { + if (shcurve) { + delete shcurve; + shcurve = nullptr; } + } else { + lhutili = true; + } - bool higplus = false ; - int moderetinex = 2; // default to 2 ( deh.retinexMethod == "high" ) + bool higplus = false ; + int moderetinex = 2; // default to 2 ( deh.retinexMethod == "high" ) - if(deh.retinexMethod == "highliplus") { - higplus = true; - moderetinex = 3; - } else if (deh.retinexMethod == "uni") { - moderetinex = 0; - } else if (deh.retinexMethod == "low") { - moderetinex = 1; - } else { /*if (deh.retinexMethod == "highli") */ - moderetinex = 3; - } + if(deh.retinexMethod == "highliplus") { + higplus = true; + moderetinex = 3; + } else if (deh.retinexMethod == "uni") { + moderetinex = 0; + } else if (deh.retinexMethod == "low") { + moderetinex = 1; + } else { /*if (deh.retinexMethod == "highli") */ + moderetinex = 3; + } - constexpr float aahi = 49.f / 99.f; ////reduce sensibility 50% - constexpr float bbhi = 1.f - aahi; + constexpr float aahi = 49.f / 99.f; ////reduce sensibility 50% + constexpr float bbhi = 1.f - aahi; + float high = bbhi + aahi * (float) deh.highl; - for(int it = 1; it < iter + 1; it++) { //iter nb max of iterations - float high = bbhi + aahi * (float) deh.highl; + for (int it = 1; it < iter + 1; it++) { //iter nb max of iterations + float grad = 1.f; + float sc = scal; - float grad = 1.f; - float sc = scal; - - if(gradient == 0) { - grad = 1.f; - sc = 3.f; - } else if(gradient == 1) { - grad = 0.25f * it + 0.75f; - sc = -0.5f * it + 4.5f; - } else if(gradient == 2) { - grad = 0.5f * it + 0.5f; - sc = -0.75f * it + 5.75f; - } else if(gradient == 3) { - grad = 0.666f * it + 0.333f; - sc = -0.75f * it + 5.75f; - } else if(gradient == 4) { - grad = 0.8f * it + 0.2f; - sc = -0.75f * it + 5.75f; - } else if(gradient == 5) { - if(moderetinex != 3) { - grad = 2.5f * it - 1.5f; - } else { - float aa = (11.f * high - 1.f) / 4.f; - float bb = 1.f - aa; - grad = aa * it + bb; - } - - sc = -0.75f * it + 5.75f; - } else if(gradient == 6) { - if(moderetinex != 3) { - grad = 5.f * it - 4.f; - } else { - float aa = (21.f * high - 1.f) / 4.f; - float bb = 1.f - aa; - grad = aa * it + bb; - } - - sc = -0.75f * it + 5.75f; - } - - else if(gradient == -1) { - grad = -0.125f * it + 1.125f; - sc = 3.f; - } - - if(iter == 1) { - sc = scal; + if (gradient == 0) { + grad = 1.f; + sc = 3.f; + } else if (gradient == 1) { + grad = 0.25f * it + 0.75f; + sc = -0.5f * it + 4.5f; + } else if (gradient == 2) { + grad = 0.5f * it + 0.5f; + sc = -0.75f * it + 5.75f; + } else if (gradient == 3) { + grad = 0.666f * it + 0.333f; + sc = -0.75f * it + 5.75f; + } else if (gradient == 4) { + grad = 0.8f * it + 0.2f; + sc = -0.75f * it + 5.75f; + } else if (gradient == 5) { + if (moderetinex != 3) { + grad = 2.5f * it - 1.5f; } else { - //adjust sc in function of choice of scale by user if iterations - if(scal < 3) { - sc -= 1; + float aa = (11.f * high - 1.f) / 4.f; + float bb = 1.f - aa; + grad = aa * it + bb; + } - if(sc < 1.f) {//avoid 0 - sc = 1.f; - } + sc = -0.75f * it + 5.75f; + } else if (gradient == 6) { + if (moderetinex != 3) { + grad = 5.f * it - 4.f; + } else { + float aa = (21.f * high - 1.f) / 4.f; + float bb = 1.f - aa; + grad = aa * it + bb; + } + + sc = -0.75f * it + 5.75f; + } else if (gradient == -1) { + grad = -0.125f * it + 1.125f; + sc = 3.f; + } + + if (iter == 1) { + sc = scal; + } else { + //adjust sc in function of choice of scale by user if iterations + if (scal < 3) { + sc -= 1; + if (sc < 1.f) {//avoid 0 + sc = 1.f; } + } else if (scal > 4) { + sc += 1; + } + } - if(scal > 4) { - sc += 1; + float varx = vart; + float limdx = limD; + float ilimdx = ilimD; + + if (gradvart != 0) { + if (gradvart == 1) { + varx = vart * (-0.125f * it + 1.125f); + limdx = limD * (-0.125f * it + 1.125f); + ilimdx = 1.f / limdx; + } else if (gradvart == 2) { + varx = vart * (-0.2f * it + 1.2f); + limdx = limD * (-0.2f * it + 1.2f); + ilimdx = 1.f / limdx; + } else if (gradvart == -1) { + varx = vart * (0.125f * it + 0.875f); + limdx = limD * (0.125f * it + 0.875f); + ilimdx = 1.f / limdx; + } else if (gradvart == -2) { + varx = vart * (0.4f * it + 0.6f); + limdx = limD * (0.4f * it + 0.6f); + ilimdx = 1.f / limdx; + } + } + + scal = round(sc); + float ks = 1.f; + + if (gradstr != 0) { + if (gradstr == 1) { + if (it <= 3) { + ks = -0.3f * it + 1.6f; + } else { + ks = 0.5f; + } + } else if (gradstr == 2) { + if (it <= 3) { + ks = -0.6f * it + 2.2f; + } else { + ks = 0.3f; + } + } else if (gradstr == -1) { + if (it <= 3) { + ks = 0.2f * it + 0.6f; + } else { + ks = 1.2f; + } + } else if (gradstr == -2) { + if (it <= 3) { + ks = 0.4f * it + 0.2f; + } else { + ks = 1.5f; } } + } - float varx = vart; - float limdx = limD; - float ilimdx = ilimD; + const float strengthx = ks * strength; - if(gradvart != 0) { - if(gradvart == 1) { - varx = vart * (-0.125f * it + 1.125f); - limdx = limD * (-0.125f * it + 1.125f); - ilimdx = 1.f / limdx; - } else if(gradvart == 2) { - varx = vart * (-0.2f * it + 1.2f); - limdx = limD * (-0.2f * it + 1.2f); - ilimdx = 1.f / limdx; - } else if(gradvart == -1) { - varx = vart * (0.125f * it + 0.875f); - limdx = limD * (0.125f * it + 0.875f); - ilimdx = 1.f / limdx; - } else if(gradvart == -2) { - varx = vart * (0.4f * it + 0.6f); - limdx = limD * (0.4f * it + 0.6f); - ilimdx = 1.f / limdx; - } - } + constexpr auto maxRetinexScales = 8; + float RetinexScales[maxRetinexScales]; - scal = round(sc); - float ks = 1.f; + retinex_scales(RetinexScales, scal, moderetinex, nei / grad, high); - if(gradstr != 0) { - if(gradstr == 1) { - if(it <= 3) { - ks = -0.3f * it + 1.6f; - } else { - ks = 0.5f; - } - } else if(gradstr == 2) { - if(it <= 3) { - ks = -0.6f * it + 2.2f; - } else { - ks = 0.3f; - } - } else if(gradstr == -1) { - if(it <= 3) { - ks = 0.2f * it + 0.6f; - } else { - ks = 1.2f; - } - } else if(gradstr == -2) { - if(it <= 3) { - ks = 0.4f * it + 0.2f; - } else { - ks = 1.5f; - } - } - } + const int shHighlights = deh.highlights; + const int shShadows = deh.shadows; - float strengthx = ks * strength; + int mapmet = 0; - constexpr auto maxRetinexScales = 8; - float RetinexScales[maxRetinexScales]; + if(deh.mapMethod == "map") { + mapmet = 2; + } else if(deh.mapMethod == "mapT") { + mapmet = 3; + } else if(deh.mapMethod == "gaus") { + mapmet = 4; + } - retinex_scales( RetinexScales, scal, moderetinex, nei / grad, high ); + const double shradius = mapmet == 4 ? (double) deh.radius : 40.; - float *src[H_L] ALIGNED16; - float *srcBuffer = new float[H_L * W_L]; + int viewmet = 0; - for (int i = 0; i < H_L; i++) { - src[i] = &srcBuffer[i * W_L]; - } + if(deh.viewMethod == "mask") { + viewmet = 1; + } else if(deh.viewMethod == "tran") { + viewmet = 2; + } else if(deh.viewMethod == "tran2") { + viewmet = 3; + } else if(deh.viewMethod == "unsharp") { + viewmet = 4; + } - int h_th = 0, s_th = 0; - - int shHighlights = deh.highlights; - int shShadows = deh.shadows; - - int mapmet = 0; - - if(deh.mapMethod == "map") { - mapmet = 2; - } else if(deh.mapMethod == "mapT") { - mapmet = 3; - } else if(deh.mapMethod == "gaus") { - mapmet = 4; - } - - const double shradius = mapmet == 4 ? (double) deh.radius : 40.; - - int viewmet = 0; - - if(deh.viewMethod == "mask") { - viewmet = 1; - } else if(deh.viewMethod == "tran") { - viewmet = 2; - } else if(deh.viewMethod == "tran2") { - viewmet = 3; - } else if(deh.viewMethod == "unsharp") { - viewmet = 4; - } + std::unique_ptr> srcBuffer(new JaggedArray(W_L, H_L)); + float** src = *(srcBuffer.get()); #ifdef _OPENMP - #pragma omp parallel for + #pragma omp parallel for #endif - for (int i = 0; i < H_L; i++) - for (int j = 0; j < W_L; j++) { - src[i][j] = luminance[i][j] + eps; - luminance[i][j] = 0.f; - } - - float *out[H_L] ALIGNED16; - float *outBuffer = new float[H_L * W_L]; - - for (int i = 0; i < H_L; i++) { - out[i] = &outBuffer[i * W_L]; + for (int i = 0; i < H_L; i++) + for (int j = 0; j < W_L; j++) { + src[i][j] = luminance[i][j] + eps; + luminance[i][j] = 0.f; } - if(viewmet == 3 || viewmet == 2) { - tranBuffer = new float[H_L * W_L]; + JaggedArray out(W_L, H_L); + JaggedArray& tran = out; // tran and out can safely use the same buffer - for (int i = 0; i < H_L; i++) { - tran[i] = &tranBuffer[i * W_L]; - } - } + const float logBetaGain = xlogf(16384.f); + float pond = logBetaGain / (float) scal; - const float logBetaGain = xlogf(16384.f); - float pond = logBetaGain / (float) scal; + if(!useHslLin) { + pond /= log(elogt); + } - if(!useHslLin) { - pond /= log(elogt); - } + std::unique_ptr shmap; + if (((mapmet == 2 || mapmet == 3 || mapmet == 4) && it == 1)) { + shmap.reset(new SHMap(W_L, H_L)); + } - auto shmap = ((mapmet == 2 || mapmet == 3 || mapmet == 4) && it == 1) ? new SHMap (W_L, H_L) : nullptr; + std::unique_ptr buffer; + if (mapmet > 0) { + buffer.reset(new float[W_L * H_L]); + } - float *buffer = new float[W_L * H_L];; - - for ( int scale = scal - 1; scale >= 0; scale-- ) { -#ifdef _OPENMP - #pragma omp parallel -#endif - { - if(scale == scal - 1) - { - gaussianBlur (src, out, W_L, H_L, RetinexScales[scale], buffer); - } else { // reuse result of last iteration - // out was modified in last iteration => restore it - if((((mapmet == 2 && scale > 1) || mapmet == 3 || mapmet == 4) || (mapmet > 0 && mapcontlutili)) && it == 1) - { -#ifdef _OPENMP - #pragma omp for -#endif - - for (int i = 0; i < H_L; i++) { - for (int j = 0; j < W_L; j++) { - out[i][j] = buffer[i * W_L + j]; - } - } - } - - gaussianBlur (out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), buffer); - } - if((((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) || (mapmet > 0 && mapcontlutili)) && it == 1 && scale > 0) - { - // out will be modified => store it for use in next iteration. We even don't need a new buffer because 'buffer' is free after gaussianBlur :) -#ifdef _OPENMP - #pragma omp for -#endif - - for (int i = 0; i < H_L; i++) { - for (int j = 0; j < W_L; j++) { - buffer[i * W_L + j] = out[i][j]; - } - } - } - } - - if(((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) { - shmap->updateL (out, shradius, true, 1); - h_th = shmap->max_f - deh.htonalwidth * (shmap->max_f - shmap->avg) / 100; - s_th = deh.stonalwidth * (shmap->avg - shmap->min_f) / 100; - } - -#ifdef __SSE2__ - vfloat pondv = F2V(pond); - vfloat limMinv = F2V(ilimdx); - vfloat limMaxv = F2V(limdx); - -#endif - - if(mapmet > 0 && mapcontlutili && it == 1) { - // TODO: When rgbcurvespeedup branch is merged into master we can simplify the code by - // 1) in rawimagesource.retinexPrepareCurves() insert - // mapcurve *= 0.5f; - // after - // CurveFactory::mapcurve (mapcontlutili, retinexParams.mapcurve, mapcurve, 1, lhist16RETI, histLRETI); - // 2) remove the division by 2.f from the code 7 lines below this line + for (int scale = scal - 1; scale >= 0; --scale) { + if (scale == scal - 1) { + gaussianBlur(src, out, W_L, H_L, RetinexScales[scale], true); + } else { // reuse result of last iteration + // out was modified in last iteration => restore it + if((((mapmet == 2 && scale > 1) || mapmet == 3 || mapmet == 4) || (mapmet > 0 && mapcontlutili)) && it == 1) { #ifdef _OPENMP #pragma omp parallel for #endif for (int i = 0; i < H_L; i++) { for (int j = 0; j < W_L; j++) { - out[i][j] = mapcurve[2.f * out[i][j]] / 2.f; + out[i][j] = buffer[i * W_L + j]; } } - } - if(((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) { - float hWeight = (100.f - shHighlights) / 100.f; - float sWeight = (100.f - shShadows) / 100.f; + gaussianBlur(out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), true); + } + + if ((((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) || (mapmet > 0 && mapcontlutili)) && it == 1 && scale > 0) { + // out will be modified => store it for use in next iteration. #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic,16) + #pragma omp parallel for #endif - for (int i = 0; i < H_L; i++) { - for (int j = 0; j < W_L; j++) { - float mapval = 1.f + shmap->map[i][j]; - float factor = 1.f; + for (int i = 0; i < H_L; i++) { + for (int j = 0; j < W_L; j++) { + buffer[i * W_L + j] = out[i][j]; + } + } + } + int h_th = 0; + int s_th = 0; + if (((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) { + shmap->updateL(out, shradius, true, 1); + h_th = shmap->max_f - deh.htonalwidth * (shmap->max_f - shmap->avg) / 100; + s_th = deh.stonalwidth * (shmap->avg - shmap->min_f) / 100; + } - if (mapval > h_th) { - factor = (h_th + hWeight * (mapval - h_th)) / mapval; - } else if (mapval < s_th) { - factor = (s_th - sWeight * (s_th - mapval)) / mapval; - } + if (mapmet > 0 && mapcontlutili && it == 1) { +#ifdef _OPENMP + #pragma omp parallel for +#endif - out[i][j] *= factor; + for (int i = 0; i < H_L; i++) { + for (int j = 0; j < W_L; j++) { + out[i][j] = mapcurve[2.f * out[i][j]]; + } + } + } + + if (((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) { + const float hWeight = (100.f - shHighlights) / 100.f; + const float sWeight = (100.f - shShadows) / 100.f; +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic,16) +#endif + + for (int i = 0; i < H_L; i++) { + for (int j = 0; j < W_L; j++) { + const float mapval = 1.f + shmap->map[i][j]; + float factor; + + if (mapval > h_th) { + factor = (h_th + hWeight * (mapval - h_th)) / mapval; + } else if (mapval < s_th) { + factor = (s_th - sWeight * (s_th - mapval)) / mapval; + } else { + factor = 1.f; } + + out[i][j] *= factor; + } + } + } + +#ifdef _OPENMP + #pragma omp parallel for +#endif + + for (int i = 0; i < H_L; i++) { + int j = 0; + +#ifdef __SSE2__ + const vfloat pondv = F2V(pond); + const vfloat limMinv = F2V(ilimdx); + const vfloat limMaxv = F2V(limdx); + if( useHslLin) { + for (; j < W_L - 3; j += 4) { + STVFU(luminance[i][j], LVFU(luminance[i][j]) + pondv * vclampf(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv)); + } + } else { + for (; j < W_L - 3; j += 4) { + STVFU(luminance[i][j], LVFU(luminance[i][j]) + pondv * xlogf(vclampf(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv))); + } + } + +#endif + + if(useHslLin) { + for (; j < W_L; j++) { + luminance[i][j] += pond * LIM(src[i][j] / out[i][j], ilimdx, limdx); + } + } else { + for (; j < W_L; j++) { + luminance[i][j] += pond * xlogf(LIM(src[i][j] / out[i][j], ilimdx, limdx)); // /logt ? + } + } + } + } + + srcBuffer.reset(); + + float mean = 0.f; + float stddv = 0.f; + // I call mean_stddv2 instead of mean_stddv ==> logBetaGain + + float maxtr, mintr; + mean_stddv2(luminance, mean, stddv, W_L, H_L, maxtr, mintr); + //printf("mean=%f std=%f delta=%f maxtr=%f mintr=%f\n", mean, stddv, delta, maxtr, mintr); + + //mean_stddv( luminance, mean, stddv, W_L, H_L, logBetaGain, maxtr, mintr); + if (dehatransmissionCurve && mean != 0.f && stddv != 0.f) { //if curve + float asig = 0.166666f / stddv; + float bsig = 0.5f - asig * mean; + float amax = 0.333333f / (maxtr - mean - stddv); + float bmax = 1.f - amax * maxtr; + float amin = 0.333333f / (mean - stddv - mintr); + float bmin = -amin * mintr; + + asig *= 500.f; + bsig *= 500.f; + amax *= 500.f; + bmax *= 500.f; + amin *= 500.f; + bmin *= 500.f; + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic,16) +#endif + + for (int i = 0; i < H_L; i++ ) { + for (int j = 0; j < W_L; j++) { //for mintr to maxtr evalate absciss in function of original transmission + float absciss; + if (LIKELY(fabsf(luminance[i][j] - mean) < stddv)) { + absciss = asig * luminance[i][j] + bsig; + } else if (luminance[i][j] >= mean) { + absciss = amax * luminance[i][j] + bmax; + } else { /*if(luminance[i][j] <= mean - stddv)*/ + absciss = amin * luminance[i][j] + bmin; + } + + //TODO : move multiplication by 4.f and subtraction of 1.f inside the curve + luminance[i][j] *= (-1.f + 4.f * dehatransmissionCurve[absciss]); //new transmission + + if(viewmet == 3 || viewmet == 2) { + tran[i][j] = luminance[i][j]; + } + } + } + + // median filter on transmission ==> reduce artifacts + if (deh.medianmap && it == 1) { //only one time + JaggedArray tmL(W_L, H_L); + constexpr int borderL = 1; + +#ifdef _OPENMP + #pragma omp parallel for +#endif + + for (int i = borderL; i < H_L - borderL; i++) { + for (int j = borderL; j < W_L - borderL; j++) { + tmL[i][j] = median(luminance[i][j], luminance[i - 1][j], luminance[i + 1][j], luminance[i][j + 1], luminance[i][j - 1], luminance[i - 1][j - 1], luminance[i - 1][j + 1], luminance[i + 1][j - 1], luminance[i + 1][j + 1]); //3x3 } } @@ -499,307 +560,164 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e #pragma omp parallel for #endif - for (int i = 0; i < H_L; i++) { - int j = 0; - -#ifdef __SSE2__ - - if(useHslLin) { - for (; j < W_L - 3; j += 4) { - _mm_storeu_ps(&luminance[i][j], LVFU(luminance[i][j]) + pondv * (vclampf(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv) )); - } - } else { - for (; j < W_L - 3; j += 4) { - _mm_storeu_ps(&luminance[i][j], LVFU(luminance[i][j]) + pondv * xlogf(vclampf(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv) )); - } - } - -#endif - - if(useHslLin) { - for (; j < W_L; j++) { - luminance[i][j] += pond * (LIM(src[i][j] / out[i][j], ilimdx, limdx)); - } - } else { - for (; j < W_L; j++) { - luminance[i][j] += pond * xlogf(LIM(src[i][j] / out[i][j], ilimdx, limdx)); // /logt ? - } + for (int i = borderL; i < H_L - borderL; i++ ) { + for (int j = borderL; j < W_L - borderL; j++) { + luminance[i][j] = tmL[i][j]; } } } - if(mapmet > 1) { - if(shmap) { - delete shmap; - } - } - - shmap = nullptr; - - delete [] buffer; - delete [] srcBuffer; - - float mean = 0.f; - float stddv = 0.f; // I call mean_stddv2 instead of mean_stddv ==> logBetaGain + //mean_stddv( luminance, mean, stddv, W_L, H_L, 1.f, maxtr, mintr); + mean_stddv2(luminance, mean, stddv, W_L, H_L, maxtr, mintr); + } - mean_stddv2( luminance, mean, stddv, W_L, H_L, maxtr, mintr); - //printf("mean=%f std=%f delta=%f maxtr=%f mintr=%f\n", mean, stddv, delta, maxtr, mintr); + constexpr float epsil = 0.1f; - //mean_stddv( luminance, mean, stddv, W_L, H_L, logBetaGain, maxtr, mintr); - if (dehatransmissionCurve && mean != 0.f && stddv != 0.f) { //if curve - float asig = 0.166666f / stddv; - float bsig = 0.5f - asig * mean; - float amax = 0.333333f / (maxtr - mean - stddv); - float bmax = 1.f - amax * maxtr; - float amin = 0.333333f / (mean - stddv - mintr); - float bmin = -amin * mintr; + mini = mean - varx * stddv; - asig *= 500.f; - bsig *= 500.f; - amax *= 500.f; - bmax *= 500.f; - amin *= 500.f; - bmin *= 500.f; + if (mini < mintr) { + mini = mintr + epsil; + } -#ifdef _OPENMP - #pragma omp parallel -#endif - { - float absciss; -#ifdef _OPENMP - #pragma omp for schedule(dynamic,16) -#endif + maxi = mean + varx * stddv; - for (int i = 0; i < H_L; i++ ) - for (int j = 0; j < W_L; j++) { //for mintr to maxtr evalate absciss in function of original transmission - if (LIKELY(fabsf(luminance[i][j] - mean) < stddv)) { - absciss = asig * luminance[i][j] + bsig; - } else if (luminance[i][j] >= mean) { - absciss = amax * luminance[i][j] + bmax; - } else { /*if(luminance[i][j] <= mean - stddv)*/ - absciss = amin * luminance[i][j] + bmin; - } + if (maxi > maxtr) { + maxi = maxtr - epsil; + } - //TODO : move multiplication by 4.f and subtraction of 1.f inside the curve - luminance[i][j] *= (-1.f + 4.f * dehatransmissionCurve[absciss]); //new transmission + float delta = maxi - mini; + //printf("maxi=%f mini=%f mean=%f std=%f delta=%f maxtr=%f mintr=%f\n", maxi, mini, mean, stddv, delta, maxtr, mintr); - if(viewmet == 3 || viewmet == 2) { - tran[i][j] = luminance[i][j]; - } - } - } + if ( !delta ) { + delta = 1.0f; + } - // median filter on transmission ==> reduce artifacts - if (deh.medianmap && it == 1) { //only one time - int wid = W_L; - int hei = H_L; - float *tmL[hei] ALIGNED16; - float *tmLBuffer = new float[wid * hei]; - int borderL = 1; - - for (int i = 0; i < hei; i++) { - tmL[i] = &tmLBuffer[i * wid]; - } - -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int i = borderL; i < hei - borderL; i++) { - for (int j = borderL; j < wid - borderL; j++) { - tmL[i][j] = median(luminance[i][j], luminance[i - 1][j], luminance[i + 1][j], luminance[i][j + 1], luminance[i][j - 1], luminance[i - 1][j - 1], luminance[i - 1][j + 1], luminance[i + 1][j - 1], luminance[i + 1][j + 1]); //3x3 - } - } - -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int i = borderL; i < hei - borderL; i++ ) { - for (int j = borderL; j < wid - borderL; j++) { - luminance[i][j] = tmL[i][j]; - } - } - - delete [] tmLBuffer; - - } - - // I call mean_stddv2 instead of mean_stddv ==> logBetaGain - //mean_stddv( luminance, mean, stddv, W_L, H_L, 1.f, maxtr, mintr); - mean_stddv2( luminance, mean, stddv, W_L, H_L, maxtr, mintr); - - } - - float epsil = 0.1f; - - mini = mean - varx * stddv; - - if (mini < mintr) { - mini = mintr + epsil; - } - - maxi = mean + varx * stddv; - - if (maxi > maxtr) { - maxi = maxtr - epsil; - } - - float delta = maxi - mini; - //printf("maxi=%f mini=%f mean=%f std=%f delta=%f maxtr=%f mintr=%f\n", maxi, mini, mean, stddv, delta, maxtr, mintr); - - if ( !delta ) { - delta = 1.0f; - } - - float cdfactor = 32768.f / delta; - maxCD = -9999999.f; - minCD = 9999999.f; - // coeff for auto "transmission" with 2 sigma #95% data - float aza = 16300.f / (2.f * stddv); - float azb = -aza * (mean - 2.f * stddv); - float bza = 16300.f / (2.f * stddv); - float bzb = 16300.f - bza * (mean); + // coeff for auto "transmission" with 2 sigma #95% data + const float aza = 16300.f / (2.f * stddv); + const float azb = -aza * (mean - 2.f * stddv); + const float bza = 16300.f / (2.f * stddv); + const float bzb = 16300.f - bza * (mean); //prepare work for curve gain #ifdef _OPENMP - #pragma omp parallel for + #pragma omp parallel for #endif - for (int i = 0; i < H_L; i++) { - for (int j = 0; j < W_L; j++) { - luminance[i][j] = luminance[i][j] - mini; - } + for (int i = 0; i < H_L; i++) { + for (int j = 0; j < W_L; j++) { + luminance[i][j] = luminance[i][j] - mini; } + } - mean = 0.f; - stddv = 0.f; - // I call mean_stddv2 instead of mean_stddv ==> logBetaGain + mean = 0.f; + stddv = 0.f; + // I call mean_stddv2 instead of mean_stddv ==> logBetaGain - mean_stddv2( luminance, mean, stddv, W_L, H_L, maxtr, mintr); - float asig = 0.f, bsig = 0.f, amax = 0.f, bmax = 0.f, amin = 0.f, bmin = 0.f; + mean_stddv2(luminance, mean, stddv, W_L, H_L, maxtr, mintr); + float asig = 0.f, bsig = 0.f, amax = 0.f, bmax = 0.f, amin = 0.f, bmin = 0.f; - if (dehagaintransmissionCurve && mean != 0.f && stddv != 0.f) { //if curve - asig = 0.166666f / stddv; - bsig = 0.5f - asig * mean; - amax = 0.333333f / (maxtr - mean - stddv); - bmax = 1.f - amax * maxtr; - amin = 0.333333f / (mean - stddv - mintr); - bmin = -amin * mintr; + if (dehagaintransmissionCurve && mean != 0.f && stddv != 0.f) { //if curve + asig = 0.166666f / stddv; + bsig = 0.5f - asig * mean; + amax = 0.333333f / (maxtr - mean - stddv); + bmax = 1.f - amax * maxtr; + amin = 0.333333f / (mean - stddv - mintr); + bmin = -amin * mintr; - asig *= 500.f; - bsig *= 500.f; - amax *= 500.f; - bmax *= 500.f; - amin *= 500.f; - bmin *= 500.f; - } + asig *= 500.f; + bsig *= 500.f; + amax *= 500.f; + bmax *= 500.f; + amin *= 500.f; + bmin *= 500.f; + } + + const float cdfactor = 32768.f / delta; + maxCD = -9999999.f; + minCD = 9999999.f; #ifdef _OPENMP - #pragma omp parallel -#endif - { - float cdmax = -999999.f, cdmin = 999999.f; -#ifdef _OPENMP - #pragma omp for schedule(dynamic,16) nowait + #pragma omp parallel for reduction(max:maxCD) reduction(min:minCD) schedule(dynamic, 16) #endif - for ( int i = 0; i < H_L; i ++ ) - for (int j = 0; j < W_L; j++) { - float gan; + for ( int i = 0; i < H_L; i ++ ) { + for (int j = 0; j < W_L; j++) { + float gan; - if (dehagaintransmissionCurve && mean != 0.f && stddv != 0.f) { - float absciss; - - if (LIKELY(fabsf(luminance[i][j] - mean) < stddv)) { - absciss = asig * luminance[i][j] + bsig; - } else if (luminance[i][j] >= mean) { - absciss = amax * luminance[i][j] + bmax; - } else { /*if(luminance[i][j] <= mean - stddv)*/ - absciss = amin * luminance[i][j] + bmin; - } - - - // float cd = cdfactor * ( luminance[i][j] - mini ) + offse; - // TODO : move multiplication by 2.f inside the curve - gan = 2.f * (dehagaintransmissionCurve[absciss]); //new gain function transmission - } else { - gan = 0.5f; - } - - float cd = gan * cdfactor * ( luminance[i][j] ) + offse; - - cdmax = cd > cdmax ? cd : cdmax; - cdmin = cd < cdmin ? cd : cdmin; - - float str = strengthx; - - if(lhutili && it == 1) { // S=f(H) - { - float HH = exLuminance[i][j]; - float valparam; - - if(useHsl || useHslLin) { - valparam = float((shcurve->getVal(HH) - 0.5f)); - } else { - valparam = float((shcurve->getVal(Color::huelab_to_huehsv2(HH)) - 0.5f)); - } - - str *= (1.f + 2.f * valparam); - } - } - - if(higplus && exLuminance[i][j] > 65535.f * hig) { - str *= hig; - } - - if(viewmet == 0) { - luminance[i][j] = intp(str, clipretinex( cd, 0.f, 32768.f ), originalLuminance[i][j]); - } else if(viewmet == 1) { - luminance[i][j] = out[i][j]; - } else if(viewmet == 4) { - luminance[i][j] = originalLuminance[i][j] + str * (originalLuminance[i][j] - out[i][j]);//unsharp - } else if(viewmet == 2) { - if(tran[i][j] <= mean) { - luminance[i][j] = azb + aza * tran[i][j]; //auto values - } else { - luminance[i][j] = bzb + bza * tran[i][j]; - } - } else { /*if(viewmet == 3) */ - luminance[i][j] = 1000.f + tran[i][j] * 700.f; //arbitrary values to help display log values which are between -20 to + 30 - usage values -4 + 5 - } + if (dehagaintransmissionCurve && mean != 0.f && stddv != 0.f) { + float absciss; + if (LIKELY(fabsf(luminance[i][j] - mean) < stddv)) { + absciss = asig * luminance[i][j] + bsig; + } else if (luminance[i][j] >= mean) { + absciss = amax * luminance[i][j] + bmax; + } else { /*if(luminance[i][j] <= mean - stddv)*/ + absciss = amin * luminance[i][j] + bmin; } -#ifdef _OPENMP - #pragma omp critical -#endif - { - maxCD = maxCD > cdmax ? maxCD : cdmax; - minCD = minCD < cdmin ? minCD : cdmin; + + // float cd = cdfactor * ( luminance[i][j] - mini ) + offse; + // TODO : move multiplication by 2.f inside the curve + gan = 2.f * dehagaintransmissionCurve[absciss]; //new gain function transmission + } else { + gan = 0.5f; + } + + const float cd = gan * cdfactor * luminance[i][j] + offse; + + maxCD = cd > maxCD ? cd : maxCD; + minCD = cd < minCD ? cd : minCD; + + float str = strengthx; + + if (lhutili && it == 1) { // S=f(H) + { + const float HH = exLuminance[i][j]; + float valparam; + + if(useHsl || useHslLin) { + valparam = shcurve->getVal(HH) - 0.5f; + } else { + valparam = shcurve->getVal(Color::huelab_to_huehsv2(HH)) - 0.5f; + } + + str *= (1.f + 2.f * valparam); + } + } + + if (higplus && exLuminance[i][j] > 65535.f * hig) { + str *= hig; + } + + if (viewmet == 0) { + luminance[i][j] = intp(str, LIM(cd, 0.f, 32768.f), originalLuminance[i][j]); + } else if (viewmet == 1) { + luminance[i][j] = out[i][j]; + } else if (viewmet == 4) { + luminance[i][j] = originalLuminance[i][j] + str * (originalLuminance[i][j] - out[i][j]);//unsharp + } else if (viewmet == 2) { + if(tran[i][j] <= mean) { + luminance[i][j] = azb + aza * tran[i][j]; //auto values + } else { + luminance[i][j] = bzb + bza * tran[i][j]; + } + } else { /*if (viewmet == 3) */ + luminance[i][j] = 1000.f + tran[i][j] * 700.f; //arbitrary values to help display log values which are between -20 to + 30 - usage values -4 + 5 } } - - delete [] outBuffer; - outBuffer = nullptr; - //printf("cdmin=%f cdmax=%f\n",minCD, maxCD); - Tmean = mean; - Tsigma = stddv; - Tmin = mintr; - Tmax = maxtr; - - if (shcurve) { - delete shcurve; - shcurve = nullptr; - } } - if(tranBuffer) { - delete [] tranBuffer; - } + Tmean = mean; + Tsigma = stddv; + Tmin = mintr; + Tmax = maxtr; + if (shcurve) { + delete shcurve; + shcurve = nullptr; + } } } diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc index bbd1de155..0b1332ec9 100644 --- a/rtengine/ipsharpen.cc +++ b/rtengine/ipsharpen.cc @@ -207,13 +207,13 @@ BENCHFUN for (int k = 0; k < sharpenParam.deconviter; k++) { if (!needdamp) { // apply gaussian blur and divide luminance by result of gaussian blur - gaussianBlur(tmpI, tmp, W, H, sigma, nullptr, GAUSS_DIV, luminance); + gaussianBlur(tmpI, tmp, W, H, sigma, false, GAUSS_DIV, luminance); } else { // apply gaussian blur + damping gaussianBlur(tmpI, tmp, W, H, sigma); dcdamping(tmp, luminance, damping, W, H); } - gaussianBlur(tmp, tmpI, W, H, sigma, nullptr, GAUSS_MULT); + gaussianBlur(tmp, tmpI, W, H, sigma, false, GAUSS_MULT); } // end for #ifdef _OPENMP diff --git a/rtengine/ipwavelet.cc b/rtengine/ipwavelet.cc index 07e9da85b..6631aae32 100644 --- a/rtengine/ipwavelet.cc +++ b/rtengine/ipwavelet.cc @@ -32,7 +32,6 @@ #include "improcfun.h" #include "LUT.h" #include "array2D.h" -#include "boxblur.h" #include "rt_math.h" #include "mytime.h" #include "sleef.c" diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 5f1d13c79..ed1e45394 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -1950,8 +1950,8 @@ void RawImageSource::retinexPrepareCurves(const RetinexParams &retinexParams, LU CurveFactory::curveDehaContL (retinexcontlutili, retinexParams.cdcurve, cdcurve, 1, lhist16RETI, histLRETI); } - CurveFactory::mapcurve (mapcontlutili, retinexParams.mapcurve, mapcurve, 1, lhist16RETI, histLRETI); - + CurveFactory::mapcurve(mapcontlutili, retinexParams.mapcurve, mapcurve, 1, lhist16RETI, histLRETI); + mapcurve *= 0.5f; retinexParams.getCurves(retinextransmissionCurve, retinexgaintransmissionCurve); } diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index e52adea18..37d927445 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -201,7 +201,7 @@ public: } static void inverse33 (const double (*coeff)[3], double (*icoeff)[3]); - void MSR(float** luminance, float **originalLuminance, float **exLuminance, LUTf & mapcurve, bool &mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax); + void MSR(float** luminance, float **originalLuminance, float **exLuminance, const LUTf& mapcurve, bool mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax); void HLRecovery_inpaint (float** red, float** green, float** blue) override; static void HLRecovery_Luminance (float* rin, float* gin, float* bin, float* rout, float* gout, float* bout, int width, float maxval); static void HLRecovery_CIELab (float* rin, float* gin, float* bin, float* rout, float* gout, float* bout, int width, float maxval, double cam[3][3], double icam[3][3]); diff --git a/rtengine/rescale.h b/rtengine/rescale.h index e9d476fea..8e1f99271 100644 --- a/rtengine/rescale.h +++ b/rtengine/rescale.h @@ -21,6 +21,7 @@ #pragma once #include "array2D.h" +#include "rt_math.h" namespace rtengine { diff --git a/rtengine/shmap.cc b/rtengine/shmap.cc index 6b59c4b40..368442f2b 100644 --- a/rtengine/shmap.cc +++ b/rtengine/shmap.cc @@ -85,21 +85,14 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int if (!hq) { fillLuminance( img, map, lumi); - float *buffer = nullptr; - - if(radius > 40.) { - // When we pass another buffer to gaussianBlur, it will use iterated boxblur which is less prone to artifacts - buffer = new float[W * H]; - } + const bool useBoxBlur = radius > 40.0; // boxblur is less prone to artifacts for large radi #ifdef _OPENMP - #pragma omp parallel + #pragma omp parallel if (!useBoxBlur) #endif { - gaussianBlur (map, map, W, H, radius, buffer); + gaussianBlur (map, map, W, H, radius, useBoxBlur); } - - delete [] buffer; } else {