From dc1d1a561b893ed75ea43a499e7a299c5752ed4d Mon Sep 17 00:00:00 2001 From: Desmis Date: Sat, 29 Jun 2019 10:51:09 +0200 Subject: [PATCH] Optimization for FFTW Retinex and Local Contrast --- rtdata/languages/default | 3 +- rtengine/iplocallab.cc | 84 ++++++++++++++++++++++++++++++++++++---- rtengine/ipretinex.cc | 10 +---- 3 files changed, 81 insertions(+), 16 deletions(-) diff --git a/rtdata/languages/default b/rtdata/languages/default index b656e3f8c..438f2005d 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -942,6 +942,7 @@ HISTORY_MSG_695;Local - Soft method HISTORY_MSG_696;Local - Retinex Normalize HISTORY_MSG_697;Local - TM Normalize HISTORY_MSG_698;Local - Local contrast Fast Fourier +HISTORY_MSG_699;Local - Retinex Fast Fourier HISTORY_MSG_CLAMPOOG;Clip out-of-gamut colors HISTORY_MSG_COLORTONING_LABGRID_VALUE;CT - Color correction HISTORY_MSG_COLORTONING_LABREGION_AB;CT - Color correction @@ -2030,7 +2031,7 @@ TP_LOCALLAB_LIGHTRETI;Lightness TP_LOCALLAB_THRESRETI;Threshold TP_LOCALLAB_DENOIS;Denoise TP_LOCALLAB_DEHAZ;Dehaze -TP_LOCALLAB_FFTW;Use Fast Fourier +TP_LOCALLAB_FFTW;Use Fast Fourier Transform TP_LOCALLAB_GRIDONE;Color Toning TP_LOCALLAB_GRIDTWO;Direct TP_LOCALLAB_LUM;Curves LC diff --git a/rtengine/iplocallab.cc b/rtengine/iplocallab.cc index 008669b97..4b9ee24f2 100644 --- a/rtengine/iplocallab.cc +++ b/rtengine/iplocallab.cc @@ -6363,7 +6363,7 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o -// soft light and +// soft light and retinex_pde if (lp.strng > 0.f && call <= 3 && lp.sfena) { int ystart = std::max(static_cast(lp.yc - lp.lyT) - cy, 0); int yend = std::min(static_cast(lp.yc + lp.ly) - cy, original->H); @@ -6534,8 +6534,8 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o break; } } - //printf("FTsizeH =%i FTsizeW=%i \n", ftsizeH, ftsizeW); - //optimize with size fftw + //printf("FTsizeH =%i FTsizeW=%i \n", ftsizeH, ftsizeW); + //optimize with size fftw if(ystart == 0 && yend < original->H) lp.ly -= (bfh - ftsizeH); else if (ystart != 0 && yend == original->H) lp.lyT -= (bfh - ftsizeH); else if(ystart != 0 && yend != original->H) { @@ -6557,7 +6557,7 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o bfwr = ftsizeW; reduW = true; } - //new values optimized + //new values optimized ystart = std::max(static_cast(lp.yc - lp.lyT) - cy, 0); yend = std::min(static_cast(lp.yc + lp.ly) - cy, original->H); xstart = std::max(static_cast(lp.xc - lp.lxL) - cx, 0); @@ -6600,10 +6600,35 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o localContrastParams.amount = params->locallab.spots.at(sp).lcamount; localContrastParams.darkness = params->locallab.spots.at(sp).lcdarkness; localContrastParams.lightness = params->locallab.spots.at(sp).lightness; - bool fftwlc = false; - if(params->locallab.spots.at(sp).fftwlc) fftwlc = true; + bool fftwlc = false; + if(!lp.ftwlc){ ImProcFunctions::localContrast(tmp1.get(), tmp1->L, localContrastParams, fftwlc, sk); - + } else { + std::unique_ptr tmpfftw(new LabImage(bfwr, bfhr)); +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic,16) +#endif + for (int y = 0; y < bfhr; y++) { + for (int x = 0; x < bfwr; x++) { + tmpfftw->L[y][x] = tmp1->L[y][x]; + tmpfftw->a[y][x] = tmp1->a[y][x]; + tmpfftw->b[y][x] = tmp1->b[y][x]; + } + } + fftwlc = true; + ImProcFunctions::localContrast(tmpfftw.get(), tmpfftw->L, localContrastParams, fftwlc, sk); +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic,16) +#endif + for (int y = 0; y < bfhr; y++) { + for (int x = 0; x < bfwr; x++) { + tmp1->L[y][x] = tmpfftw->L[y][x]; + tmp1->a[y][x] = tmpfftw->a[y][x]; + tmp1->b[y][x] = tmpfftw->b[y][x]; + } + } + + } float minL = tmp1->L[0][0] - bufgb->L[0][0]; float maxL = minL; #ifdef _OPENMP @@ -6694,6 +6719,51 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o LabImage *buforigmas = nullptr; int bfh = int (lp.ly + lp.lyT) + del; //bfw bfh real size of square zone int bfw = int (lp.lx + lp.lxL) + del; + // printf("before bfh=%i bfw=%i\n", bfh, bfw); + + if(lp.ftwreti) { + int ftsizeH = 1; + int ftsizeW = 1; + + for (int ft=0; ft < N_fftwsize; ft++) {//find best values for FFTW + if(fftw_size[ft] <= bfh) { + ftsizeH = fftw_size[ft]; + break; + } + } + + for (int ft=0; ft < N_fftwsize; ft++) { + if(fftw_size[ft] <= bfw) { + ftsizeW = fftw_size[ft]; + break; + } + } + + int ystart = std::max(static_cast(lp.yc - lp.lyT) - cy, 0); + int xstart = std::max(static_cast(lp.xc - lp.lxL) - cx, 0); + int yend = std::min(static_cast(lp.yc + lp.ly) - cy, original->H); + int xend = std::min(static_cast(lp.xc + lp.lx) - cx, original->W); + + if(ystart == 0 && yend < original->H) lp.ly -= (bfh - ftsizeH); + else if (ystart != 0 && yend == original->H) lp.lyT -= (bfh - ftsizeH); + else if(ystart != 0 && yend != original->H) { + if(lp.ly <= lp.lyT) lp.lyT -= (bfh - ftsizeH); + else lp.ly -= (bfh - ftsizeH); + } + + if(xstart == 0 && xend < original->W) lp.lx -= (bfw - ftsizeW); + else if(xstart != 0 && xend == original->W) lp.lxL -= (bfw - ftsizeW); + else if(xstart != 0 && xend != original->W) { + if(lp.lx <= lp.lxL) lp.lxL -= (bfw - ftsizeW); + else lp.lx -= (bfw - ftsizeW); + } + //new size bfw, bfh not optimized if spot H > high or spot W > width ==> TODO + bfh = int (lp.ly + lp.lyT) + del; + bfw = int (lp.lx + lp.lxL) + del; + //printf("after bfh=%i bfw=%i fftwH=%i fftww=%i\n", bfh, bfw, ftsizeH, ftsizeW); + + } + array2D buflight(bfw, bfh); JaggedArray bufchro(bfw, bfh); diff --git a/rtengine/ipretinex.cc b/rtengine/ipretinex.cc index 1d34aa13d..eae962483 100644 --- a/rtengine/ipretinex.cc +++ b/rtengine/ipretinex.cc @@ -961,9 +961,9 @@ void ImProcFunctions::MSRLocal(int sp, bool fftw, int lum, LabImage * bufreti, L } float *buffer = new float[W_L * H_L]; - float mulradiusfftw = 20.f; + float mulradiusfftw = 40.f; for (int scale = scal - 1; scale >= 0; scale--) { - printf("retscale=%f scale=%i \n", RetinexScales[scale], scale); + // printf("retscale=%f scale=%i \n", mulradiusfftw * RetinexScales[scale], scale); if(!fftw) { #ifdef _OPENMP #pragma omp parallel //disabled with FFTW @@ -973,13 +973,9 @@ void ImProcFunctions::MSRLocal(int sp, bool fftw, int lum, LabImage * bufreti, L if (scale == scal - 1) { gaussianBlur(src, out, W_L, H_L, RetinexScales[scale], buffer); - //ImProcFunctions::fftw_convol_blur2(src, out, W_L, H_L, RetinexScales[scale], 0); } else // reuse result of last iteration { // out was modified in last iteration => restore it - - // ImProcFunctions::fftw_convol_blur2(out, out, W_L, H_L,sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), 0); - gaussianBlur(out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), buffer); } } @@ -990,9 +986,7 @@ void ImProcFunctions::MSRLocal(int sp, bool fftw, int lum, LabImage * bufreti, L } else // reuse result of last iteration { // out was modified in last iteration => restore it - ImProcFunctions::fftw_convol_blur2(out, out, W_L, H_L,sqrtf(SQR(mulradiusfftw * RetinexScales[scale]) - SQR(mulradiusfftw * RetinexScales[scale + 1])), 0); - } }