From db03c16f45446bde9bc52e53f3ed47563912f332 Mon Sep 17 00:00:00 2001 From: Desmis Date: Sat, 16 Dec 2017 15:24:53 +0100 Subject: [PATCH] Add DCT to local chroma denoise --- rtdata/languages/default | 2 + rtengine/dcrop.cc | 2 + rtengine/improccoordinator.cc | 30 +- rtengine/improccoordinator.h | 1 + rtengine/iplocallab.cc | 877 +++++++++++++++++++++++++++++++++- rtengine/procevents.h | 1 + rtengine/procparams.cc | 4 + rtengine/procparams.h | 1 + rtengine/refreshmap.cc | 3 +- rtengine/simpleprocess.cc | 4 +- rtgui/locallab.cc | 26 +- rtgui/locallab.h | 3 +- rtgui/paramsedited.cc | 6 + rtgui/paramsedited.h | 1 + 14 files changed, 943 insertions(+), 18 deletions(-) diff --git a/rtdata/languages/default b/rtdata/languages/default index 6e201e2f6..30c463ec1 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -840,6 +840,7 @@ HISTORY_MSG_590;Local - Spot Excluding scope HISTORY_MSG_591;Local - Spot Excluding struc HISTORY_MSG_592;Local - Warm Cool HISTORY_MSG_593;Local - Noise lum detail +HISTORY_MSG_594;Local - Noise chro detail HISTORY_NEWSNAPSHOT;Add HISTORY_NEWSNAPSHOT_TOOLTIP;Shortcut: Alt-s HISTORY_SNAPSHOT;Snapshot @@ -1856,6 +1857,7 @@ TP_LOCALLAB_NOISELUMCOARSE;Luminance coarse (Wav) TP_LOCALLAB_NOISELUMDETAIL;Luminance detail (DCT) TP_LOCALLAB_NOISECHROFINE;Chroma fine (Wav) TP_LOCALLAB_NOISECHROCOARSE;Chroma coarse (Wav) +TP_LOCALLAB_NOISECHRODETAIL;Chroma detail (DCT) TP_LOCALLAB_QUAL_METHOD;Global quality TP_LOCALLAB_QUALCURV_METHOD;Curves type TP_LOCALLAB_GAM;Gamma diff --git a/rtengine/dcrop.cc b/rtengine/dcrop.cc index 988706c19..d407e25f6 100644 --- a/rtengine/dcrop.cc +++ b/rtengine/dcrop.cc @@ -1148,6 +1148,7 @@ void Crop::update(int todo) params.locallab.struc = parent->strucs[sp]; params.locallab.warm = parent->warms[sp]; params.locallab.noiselumdetail = parent->noiselumdetails[sp]; + params.locallab.noisechrodetail = parent->noisechrodetails[sp]; std::vector cretie; @@ -1528,6 +1529,7 @@ void Crop::update(int todo) parent->strucs[sp] = params.locallab.struc = parent->strucs[0]; parent->warms[sp] = params.locallab.warm = parent->warms[0]; parent->noiselumdetails[sp] = params.locallab.noiselumdetail = parent->noiselumdetails[0]; + parent->noisechrodetails[sp] = params.locallab.noisechrodetail = parent->noisechrodetails[0]; std::vector ccret; diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc index 6b666c35d..bbc61398b 100644 --- a/rtengine/improccoordinator.cc +++ b/rtengine/improccoordinator.cc @@ -156,6 +156,7 @@ ImProcCoordinator::ImProcCoordinator() noiselumfs(500, -10000), noiselumcs(500, -10000), noiselumdetails(500, -10000), + noisechrodetails(500, -10000), noisechrofs(500, -10000), noisechrocs(500, -10000), mult0s(500, -10000), @@ -858,7 +859,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) }; - int maxdata = 87;//86 10017 //85 10016;// 82 10015//78;//73 for 10011 + int maxdata = 88;//87 10016 //86 10017 //85 10016;// 82 10015//78;//73 for 10011 if (fic0) { //find current version mip @@ -902,7 +903,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) //initilize newues when first utilisation of Locallab. Prepare creation of Mip files for (int sp = 1; sp < maxspot; sp++) { // spots default int t_sp = sp; - int t_mipversion = 10018;//new value for each change + int t_mipversion = 10019;//new value for each change int t_circrad = 18; int t_locX = 250; int t_locY = 250; @@ -1025,6 +1026,8 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) int t_warm = 0; //10018 int t_noiselumdetail = 0; + //10019 + int t_noisechrodetail = 0; //all variables except locRETgainCurve 'coomon for all) fic << "Mipversion=" << t_mipversion << '@' << endl; @@ -1119,6 +1122,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) fic << "Struc=" << t_struc << '@' << endl; fic << "Warm=" << t_warm << '@' << endl; fic << "Noiselumdetail=" << t_noiselumdetail << '@' << endl; + fic << "Noisechrodetail=" << t_noisechrodetail << '@' << endl; fic << "curveReti=" << t_curvret << '@' << endl; fic << "curveLL=" << t_curvll << '@' << endl; @@ -1372,6 +1376,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) dataspot[80][0] = strucs[0] = params.locallab.struc; dataspot[81][0] = warms[0] = params.locallab.warm; dataspot[82][0] = noiselumdetails[0] = params.locallab.noiselumdetail; + dataspot[83][0] = noisechrodetails[0] = params.locallab.noisechrodetail; // for all curves work around - I do not know how to do with params curves... //curve Reti local @@ -1662,6 +1667,10 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) maxind = 81; } + if (versionmip == 10018) { + maxind = 82; + } + while (getline(fich, line)) { spotline = line; std::size_t pos = spotline.find("="); @@ -1889,6 +1898,12 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) } } + if (versionmip <= 10018) {// + for (int sp = 1; sp < maxspot; sp++) { // spots default + dataspot[83][sp] = 0; + } + } + //here we change the number of spot if (ns < (maxspot - 1)) { @@ -1897,7 +1912,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) for (int sp = ns + 1 ; sp < maxspot; sp++) { // spots default int t_sp = sp; - int t_mipversion = 10018; + int t_mipversion = 10019; int t_circrad = 18; int t_locX = 250; int t_locY = 250; @@ -2012,6 +2027,8 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) int t_warm = 0; //10018 int t_noiselumdetail = 0; + //10019 + int t_noisechrodetail = 0; fic << "Mipversion=" << t_mipversion << '@' << endl; fic << "Spot=" << t_sp << '@' << endl; @@ -2102,6 +2119,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) fic << "Struc=" << t_struc << '@' << endl; fic << "Warm=" << t_warm << '@' << endl; fic << "Noiselumdetail=" << t_noiselumdetail << '@' << endl; + fic << "Noisechrodetail=" << t_noisechrodetail << '@' << endl; fic << "curveReti=" << t_curvret << '@' << endl; fic << "curveLL=" << t_curvll << '@' << endl; @@ -2452,6 +2470,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) params.locallab.struc = strucs[sp] = dataspot[80][sp]; params.locallab.warm = warms[sp] = dataspot[81][sp]; params.locallab.noiselumdetail = noiselumdetails[sp] = dataspot[82][sp]; + params.locallab.noisechrodetail = noisechrodetails[sp] = dataspot[83][sp]; int *s_datc; @@ -2994,6 +3013,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) dataspot[80][sp] = strucs[sp] = params.locallab.struc = dataspot[80][0]; dataspot[81][sp] = warms[sp] = params.locallab.warm = dataspot[81][0]; dataspot[82][sp] = noiselumdetails[sp] = params.locallab.noiselumdetail = dataspot[82][0]; + dataspot[83][sp] = noisechrodetails[sp] = params.locallab.noisechrodetail = dataspot[83][0]; int *s_datc; @@ -3235,7 +3255,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) for (int spe = 1; spe < maxspot; spe++) { int t_sp = spe; - int t_mipversion = 10018; + int t_mipversion = 10019; int t_circrad = dataspot[2][spe]; int t_locX = dataspot[3][spe]; int t_locY = dataspot[4][spe]; @@ -3324,6 +3344,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) int t_struc = dataspot[80][spe]; int t_warm = dataspot[81][spe]; int t_noiselumdetail = dataspot[82][spe]; + int t_noisechrodetail = dataspot[83][spe]; int t_hueref = dataspot[maxdata - 4][spe]; int t_chromaref = dataspot[maxdata - 3][spe]; @@ -3431,6 +3452,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, Crop* cropCall) fou << "Struc=" << t_struc << '@' << endl; fou << "Warm=" << t_warm << '@' << endl; fou << "Noiselumdetail=" << t_noiselumdetail << '@' << endl; + fou << "Noisechrodetail=" << t_noisechrodetail << '@' << endl; fou << "hueref=" << t_hueref << '@' << endl; fou << "chromaref=" << t_chromaref << '@' << endl; diff --git a/rtengine/improccoordinator.h b/rtengine/improccoordinator.h index 7667a3b70..ce27d1158 100644 --- a/rtengine/improccoordinator.h +++ b/rtengine/improccoordinator.h @@ -307,6 +307,7 @@ protected: LUTi noiselumfs; LUTi noiselumcs; LUTi noiselumdetails; + LUTi noisechrodetails; LUTi noisechrofs; LUTi noisechrocs; LUTi mult0s; diff --git a/rtengine/iplocallab.cc b/rtengine/iplocallab.cc index 820a06ee7..d23b2b86b 100644 --- a/rtengine/iplocallab.cc +++ b/rtengine/iplocallab.cc @@ -144,6 +144,7 @@ struct local_params { int blurmet; float noiself; float noiseldetail; + float noisechrodetail; float noiselc; float noisecf; float noisecc; @@ -379,6 +380,7 @@ static void calcLocalParams(int oW, int oH, const LocallabParams& locallab, stru float local_noiself = locallab.noiselumf; float local_noiselc = locallab.noiselumc; float local_noiseldetail = locallab.noiselumdetail; + float local_noisechrodetail = locallab.noisechrodetail; float local_noisecf = locallab.noisechrof; float local_noisecc = locallab.noisechroc; float multi[5]; @@ -462,6 +464,7 @@ static void calcLocalParams(int oW, int oH, const LocallabParams& locallab, stru lp.thr = thre; lp.noiself = local_noiself; lp.noiseldetail = local_noiseldetail; + lp.noisechrodetail = local_noisechrodetail; lp.noiselc = local_noiselc; lp.noisecf = local_noisecf; lp.noisecc = local_noisecc; @@ -8506,6 +8509,8 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, if (call == 1) { LabImage tmp1(transformed->W, transformed->H); array2D *Lin = nullptr; + array2D *Ain = nullptr; + array2D *Bin = nullptr; int GW = transformed->W; @@ -8517,9 +8522,23 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, // these are needed only for creation of the plans and will be freed before entering the parallel loop fftwf_plan plan_forward_blox[2]; fftwf_plan plan_backward_blox[2]; + + fftwf_plan plan_forward_blox_a[2]; + fftwf_plan plan_backward_blox_a[2]; + + fftwf_plan plan_forward_blox_b[2]; + fftwf_plan plan_backward_blox_b[2]; + + array2D tilemask_in(TS, TS); array2D tilemask_out(TS, TS); + array2D tilemask_ina(TS, TS); + array2D tilemask_outa(TS, TS); + + array2D tilemask_inb(TS, TS); + array2D tilemask_outb(TS, TS); + if ((lp.noiself > 0.1f || lp.noiselc > 0.1f) && levred == 7) { float *Lbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); float *fLbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); @@ -8558,6 +8577,65 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, float *LbloxArray[numThreads]; float *fLbloxArray[numThreads]; + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + float *Abloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + float *fAbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + + float *Bbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + float *fBbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + + int nfwd[2] = {TS, TS}; + + //for DCT: + fftw_r2r_kind fwdkind[2] = {FFTW_REDFT10, FFTW_REDFT10}; + fftw_r2r_kind bwdkind[2] = {FFTW_REDFT01, FFTW_REDFT01}; + + // Creating the plans with FFTW_MEASURE instead of FFTW_ESTIMATE speeds up the execute a bit + plan_forward_blox_a[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, Abloxtmp, nullptr, 1, TS * TS, fAbloxtmp, nullptr, 1, TS * TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_backward_blox_a[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, fAbloxtmp, nullptr, 1, TS * TS, Abloxtmp, nullptr, 1, TS * TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_forward_blox_a[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, Abloxtmp, nullptr, 1, TS * TS, fAbloxtmp, nullptr, 1, TS * TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_backward_blox_a[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, fAbloxtmp, nullptr, 1, TS * TS, Abloxtmp, nullptr, 1, TS * TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + fftwf_free(Abloxtmp); + fftwf_free(fAbloxtmp); + + plan_forward_blox_b[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, Bbloxtmp, nullptr, 1, TS * TS, fBbloxtmp, nullptr, 1, TS * TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_backward_blox_b[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, fBbloxtmp, nullptr, 1, TS * TS, Bbloxtmp, nullptr, 1, TS * TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_forward_blox_b[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, Bbloxtmp, nullptr, 1, TS * TS, fBbloxtmp, nullptr, 1, TS * TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_backward_blox_b[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, fBbloxtmp, nullptr, 1, TS * TS, Bbloxtmp, nullptr, 1, TS * TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + fftwf_free(Bbloxtmp); + fftwf_free(fBbloxtmp); + + const int border = MAX(2, TS / 16); + + for (int i = 0; i < TS; ++i) { + float i1 = abs((i > TS / 2 ? i - TS + 1 : i)); + float vmask = (i1 < border ? SQR(sin((rtengine::RT_PI * i1) / (2 * border))) : 1.0f); + float vmask2 = (i1 < 2 * border ? SQR(sin((rtengine::RT_PI * i1) / (2 * border))) : 1.0f); + + for (int j = 0; j < TS; ++j) { + float j1 = abs((j > TS / 2 ? j - TS + 1 : j)); + tilemask_ina[i][j] = (vmask * (j1 < border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; + tilemask_outa[i][j] = (vmask2 * (j1 < 2 * border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; + + } + + for (int j = 0; j < TS; ++j) { + float j1 = abs((j > TS / 2 ? j - TS + 1 : j)); + tilemask_inb[i][j] = (vmask * (j1 < border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; + tilemask_outb[i][j] = (vmask2 * (j1 < 2 * border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; + + } + } + + } + + float *AbloxArray[numThreads]; + float *fAbloxArray[numThreads]; + float *BbloxArray[numThreads]; + float *fBbloxArray[numThreads]; + + + for (int ir = 0; ir < GH; ir++) for (int jr = 0; jr < GW; jr++) { tmp1.L[ir][jr] = original->L[ir][jr]; @@ -8921,20 +8999,381 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, fftwf_destroy_plan(plan_backward_blox[0]); fftwf_destroy_plan(plan_forward_blox[1]); fftwf_destroy_plan(plan_backward_blox[1]); - fftwf_cleanup(); + // fftwf_cleanup(); } if (!adecomp.memoryAllocationFailed) { + Ain = new array2D(GW, GH); +#ifdef _OPENMP + #pragma omp parallel for + +#endif + + for (int i = 0; i < GH; ++i) { + for (int j = 0; j < GW; ++j) { + (*Ain)[i][j] = tmp1.a[i][j]; + } + } adecomp.reconstruct(tmp1.a[0]); } + + if (!adecomp.memoryAllocationFailed) { + + const int numblox_W = ceil((static_cast(GW)) / (offset)) + 2 * blkrad; + const int numblox_H = ceil((static_cast(GH)) / (offset)) + 2 * blkrad; + + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + //residual between input and denoised L channel + array2D Ldetail(GW, GH, ARRAY2D_CLEAR_DATA); + array2D totwt(GW, GH, ARRAY2D_CLEAR_DATA); //weight for combining DCT blocks + + for (int i = 0; i < numThreads; ++i) { + AbloxArray[i] = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + fAbloxArray[i] = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + } + +#ifdef _OPENMP + int masterThread = omp_get_thread_num(); +#endif +#ifdef _OPENMP + #pragma omp parallel +#endif + { +#ifdef _OPENMP + int subThread = masterThread * 1 + omp_get_thread_num(); +#else + int subThread = 0; +#endif + float blurbuffer[TS * TS] ALIGNED64; + float *Lblox = AbloxArray[subThread]; + float *fLblox = fAbloxArray[subThread]; + float pBuf[GW + TS + 2 * blkrad * offset] ALIGNED16; + float nbrwt[TS * TS] ALIGNED64; +#ifdef _OPENMP + #pragma omp for +#endif + + for (int vblk = 0; vblk < numblox_H; ++vblk) { + + int top = (vblk - blkrad) * offset; + float * datarow = pBuf + blkrad * offset; + + for (int i = 0; i < TS; ++i) { + int row = top + i; + int rr = row; + + if (row < 0) { + rr = MIN(-row, GH - 1); + } else if (row >= GH) { + rr = MAX(0, 2 * GH - 2 - row); + } + + for (int j = 0; j < tmp1.W; ++j) { + datarow[j] = ((*Ain)[rr][j] - tmp1.a[rr][j]); + } + + for (int j = -blkrad * offset; j < 0; ++j) { + datarow[j] = datarow[MIN(-j, GW - 1)]; + } + + for (int j = GW; j < GW + TS + blkrad * offset; ++j) { + datarow[j] = datarow[MAX(0, 2 * GW - 2 - j)]; + }//now we have a padded data row + + //now fill this row of the blocks with Lab high pass data + for (int hblk = 0; hblk < numblox_W; ++hblk) { + int left = (hblk - blkrad) * offset; + int indx = (hblk) * TS; //index of block in malloc + + if (top + i >= 0 && top + i < GH) { + int j; + + for (j = 0; j < min((-left), TS); ++j) { + Lblox[(indx + i)*TS + j] = tilemask_ina[i][j] * datarow[left + j]; // luma data + } + + for (; j < min(TS, GW - left); ++j) { + Lblox[(indx + i)*TS + j] = tilemask_ina[i][j] * datarow[left + j]; // luma data + totwt[top + i][left + j] += tilemask_ina[i][j] * tilemask_outa[i][j]; + } + + for (; j < TS; ++j) { + Lblox[(indx + i)*TS + j] = tilemask_ina[i][j] * datarow[left + j]; // luma data + } + } else { + for (int j = 0; j < TS; ++j) { + Lblox[(indx + i)*TS + j] = tilemask_ina[i][j] * datarow[left + j]; // luma data + } + } + + } + + }//end of filling block row + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + //fftwf_print_plan (plan_forward_blox); + if (numblox_W == max_numblox_W) { + fftwf_execute_r2r(plan_forward_blox_a[0], Lblox, fLblox); // DCT an entire row of tiles + } else { + fftwf_execute_r2r(plan_forward_blox_a[1], Lblox, fLblox); // DCT an entire row of tiles + } + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + // now process the vblk row of blocks for noise reduction + float params_Ldetail = min(float(lp.noisechrodetail), 99.9f); // max out to avoid div by zero when using noisevar_Ldetail as divisor + float noisevar_Ldetail = SQR(static_cast(SQR(100. - params_Ldetail) + 50.*(100. - params_Ldetail)) * TS * 0.5f); + + + + for (int hblk = 0; hblk < numblox_W; ++hblk) { + ImProcFunctions::RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer); + }//end of horizontal block loop + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + //now perform inverse FT of an entire row of blocks + if (numblox_W == max_numblox_W) { + fftwf_execute_r2r(plan_backward_blox_a[0], fLblox, Lblox); //for DCT + } else { + fftwf_execute_r2r(plan_backward_blox_a[1], fLblox, Lblox); //for DCT + } + + int topproc = (vblk - blkrad) * offset; + + //add row of blocks to output image tile + ImProcFunctions::RGBoutput_tile_row(Lblox, Ldetail, tilemask_outa, GH, GW, topproc); + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + }//end of vertical block loop + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + } + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#ifdef _OPENMP + + #pragma omp parallel for +#endif + + for (int i = 0; i < GH; ++i) { + for (int j = 0; j < GW; ++j) { + //may want to include masking threshold for large hipass data to preserve edges/detail + tmp1.a[i][j] += Ldetail[i][j] / totwt[i][j]; //note that labdn initially stores the denoised hipass data + } + } + + } + + delete Ain; + } + + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + + for (int i = 0; i < numThreads; ++i) { + fftwf_free(AbloxArray[i]); + fftwf_free(fAbloxArray[i]); + } + + fftwf_destroy_plan(plan_forward_blox_a[0]); + fftwf_destroy_plan(plan_backward_blox_a[0]); + fftwf_destroy_plan(plan_forward_blox_a[1]); + fftwf_destroy_plan(plan_backward_blox_a[1]); + //fftwf_cleanup(); + + } + + + if (!bdecomp.memoryAllocationFailed) { + Bin = new array2D(GW, GH); +#ifdef _OPENMP + #pragma omp parallel for + +#endif + + for (int i = 0; i < GH; ++i) { + for (int j = 0; j < GW; ++j) { + (*Bin)[i][j] = tmp1.b[i][j]; + } + } bdecomp.reconstruct(tmp1.b[0]); } + + if (!bdecomp.memoryAllocationFailed) { + + const int numblox_W = ceil((static_cast(GW)) / (offset)) + 2 * blkrad; + const int numblox_H = ceil((static_cast(GH)) / (offset)) + 2 * blkrad; + + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + //residual between input and denoised L channel + array2D Ldetail(GW, GH, ARRAY2D_CLEAR_DATA); + array2D totwt(GW, GH, ARRAY2D_CLEAR_DATA); //weight for combining DCT blocks + + for (int i = 0; i < numThreads; ++i) { + BbloxArray[i] = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + fBbloxArray[i] = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + } + +#ifdef _OPENMP + int masterThread = omp_get_thread_num(); +#endif +#ifdef _OPENMP + #pragma omp parallel +#endif + { +#ifdef _OPENMP + int subThread = masterThread * 1 + omp_get_thread_num(); +#else + int subThread = 0; +#endif + float blurbuffer[TS * TS] ALIGNED64; + float *Lblox = BbloxArray[subThread]; + float *fLblox = fBbloxArray[subThread]; + float pBuf[GW + TS + 2 * blkrad * offset] ALIGNED16; + float nbrwt[TS * TS] ALIGNED64; +#ifdef _OPENMP + #pragma omp for +#endif + + for (int vblk = 0; vblk < numblox_H; ++vblk) { + + int top = (vblk - blkrad) * offset; + float * datarow = pBuf + blkrad * offset; + + for (int i = 0; i < TS; ++i) { + int row = top + i; + int rr = row; + + if (row < 0) { + rr = MIN(-row, GH - 1); + } else if (row >= GH) { + rr = MAX(0, 2 * GH - 2 - row); + } + + for (int j = 0; j < tmp1.W; ++j) { + datarow[j] = ((*Bin)[rr][j] - tmp1.b[rr][j]); + } + + for (int j = -blkrad * offset; j < 0; ++j) { + datarow[j] = datarow[MIN(-j, GW - 1)]; + } + + for (int j = GW; j < GW + TS + blkrad * offset; ++j) { + datarow[j] = datarow[MAX(0, 2 * GW - 2 - j)]; + }//now we have a padded data row + + //now fill this row of the blocks with Lab high pass data + for (int hblk = 0; hblk < numblox_W; ++hblk) { + int left = (hblk - blkrad) * offset; + int indx = (hblk) * TS; //index of block in malloc + + if (top + i >= 0 && top + i < GH) { + int j; + + for (j = 0; j < min((-left), TS); ++j) { + Lblox[(indx + i)*TS + j] = tilemask_inb[i][j] * datarow[left + j]; // luma data + } + + for (; j < min(TS, GW - left); ++j) { + Lblox[(indx + i)*TS + j] = tilemask_inb[i][j] * datarow[left + j]; // luma data + totwt[top + i][left + j] += tilemask_inb[i][j] * tilemask_outb[i][j]; + } + + for (; j < TS; ++j) { + Lblox[(indx + i)*TS + j] = tilemask_inb[i][j] * datarow[left + j]; // luma data + } + } else { + for (int j = 0; j < TS; ++j) { + Lblox[(indx + i)*TS + j] = tilemask_inb[i][j] * datarow[left + j]; // luma data + } + } + + } + + }//end of filling block row + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + //fftwf_print_plan (plan_forward_blox); + if (numblox_W == max_numblox_W) { + fftwf_execute_r2r(plan_forward_blox_b[0], Lblox, fLblox); // DCT an entire row of tiles + } else { + fftwf_execute_r2r(plan_forward_blox_b[1], Lblox, fLblox); // DCT an entire row of tiles + } + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + // now process the vblk row of blocks for noise reduction + float params_Ldetail = min(float(lp.noisechrodetail), 99.9f); // max out to avoid div by zero when using noisevar_Ldetail as divisor + float noisevar_Ldetail = SQR(static_cast(SQR(100. - params_Ldetail) + 50.*(100. - params_Ldetail)) * TS * 0.5f); + + + + for (int hblk = 0; hblk < numblox_W; ++hblk) { + ImProcFunctions::RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer); + }//end of horizontal block loop + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + //now perform inverse FT of an entire row of blocks + if (numblox_W == max_numblox_W) { + fftwf_execute_r2r(plan_backward_blox_b[0], fLblox, Lblox); //for DCT + } else { + fftwf_execute_r2r(plan_backward_blox_b[1], fLblox, Lblox); //for DCT + } + + int topproc = (vblk - blkrad) * offset; + + //add row of blocks to output image tile + ImProcFunctions::RGBoutput_tile_row(Lblox, Ldetail, tilemask_outb, GH, GW, topproc); + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + }//end of vertical block loop + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + } + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#ifdef _OPENMP + + #pragma omp parallel for +#endif + + for (int i = 0; i < GH; ++i) { + for (int j = 0; j < GW; ++j) { + //may want to include masking threshold for large hipass data to preserve edges/detail + tmp1.b[i][j] += Ldetail[i][j] / totwt[i][j]; //note that labdn initially stores the denoised hipass data + } + } + + } + + delete Bin; + } + + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + + for (int i = 0; i < numThreads; ++i) { + fftwf_free(BbloxArray[i]); + fftwf_free(fBbloxArray[i]); + } + + fftwf_destroy_plan(plan_forward_blox_b[0]); + fftwf_destroy_plan(plan_backward_blox_b[0]); + fftwf_destroy_plan(plan_forward_blox_b[1]); + fftwf_destroy_plan(plan_backward_blox_b[1]); + // fftwf_cleanup(); + + } + + fftwf_cleanup(); + + DeNoise_Local(call, lp, original, transformed, tmp1, cx, cy); } else if (call == 2) { //simpleprocess @@ -8944,15 +9383,31 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, LabImage bufwv(bfw, bfh); bufwv.clear(true); array2D *Lin = nullptr; + array2D *Ain = nullptr; + array2D *Bin = nullptr; + int max_numblox_W = ceil((static_cast(bfw)) / (offset)) + 2 * blkrad; // calculate min size of numblox_W. int min_numblox_W = ceil((static_cast(bfw)) / (offset)) + 2 * blkrad; // these are needed only for creation of the plans and will be freed before entering the parallel loop fftwf_plan plan_forward_blox[2]; fftwf_plan plan_backward_blox[2]; + + fftwf_plan plan_forward_blox_a[2]; + fftwf_plan plan_backward_blox_a[2]; + + fftwf_plan plan_forward_blox_b[2]; + fftwf_plan plan_backward_blox_b[2]; + array2D tilemask_in(TS, TS); array2D tilemask_out(TS, TS); + array2D tilemask_ina(TS, TS); + array2D tilemask_outa(TS, TS); + + array2D tilemask_inb(TS, TS); + array2D tilemask_outb(TS, TS); + if ((lp.noiself > 0.1f || lp.noiselc > 0.1f) && levred == 7) { float *Lbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); float *fLbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); @@ -8982,7 +9437,6 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, float j1 = abs((j > TS / 2 ? j - TS + 1 : j)); tilemask_in[i][j] = (vmask * (j1 < border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; tilemask_out[i][j] = (vmask2 * (j1 < 2 * border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; - } } @@ -8991,6 +9445,63 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, float *LbloxArray[numThreads]; float *fLbloxArray[numThreads]; + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + float *Abloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + float *fAbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + + float *Bbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + float *fBbloxtmp = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + + int nfwd[2] = {TS, TS}; + + //for DCT: + fftw_r2r_kind fwdkind[2] = {FFTW_REDFT10, FFTW_REDFT10}; + fftw_r2r_kind bwdkind[2] = {FFTW_REDFT01, FFTW_REDFT01}; + + // Creating the plans with FFTW_MEASURE instead of FFTW_ESTIMATE speeds up the execute a bit + plan_forward_blox_a[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, Abloxtmp, nullptr, 1, TS * TS, fAbloxtmp, nullptr, 1, TS * TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_backward_blox_a[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, fAbloxtmp, nullptr, 1, TS * TS, Abloxtmp, nullptr, 1, TS * TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_forward_blox_a[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, Abloxtmp, nullptr, 1, TS * TS, fAbloxtmp, nullptr, 1, TS * TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_backward_blox_a[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, fAbloxtmp, nullptr, 1, TS * TS, Abloxtmp, nullptr, 1, TS * TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + fftwf_free(Abloxtmp); + fftwf_free(fAbloxtmp); + + plan_forward_blox_b[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, Bbloxtmp, nullptr, 1, TS * TS, fBbloxtmp, nullptr, 1, TS * TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_backward_blox_b[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, fBbloxtmp, nullptr, 1, TS * TS, Bbloxtmp, nullptr, 1, TS * TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_forward_blox_b[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, Bbloxtmp, nullptr, 1, TS * TS, fBbloxtmp, nullptr, 1, TS * TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + plan_backward_blox_b[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, fBbloxtmp, nullptr, 1, TS * TS, Bbloxtmp, nullptr, 1, TS * TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT); + fftwf_free(Bbloxtmp); + fftwf_free(fBbloxtmp); + + const int border = MAX(2, TS / 16); + + for (int i = 0; i < TS; ++i) { + float i1 = abs((i > TS / 2 ? i - TS + 1 : i)); + float vmask = (i1 < border ? SQR(sin((rtengine::RT_PI * i1) / (2 * border))) : 1.0f); + float vmask2 = (i1 < 2 * border ? SQR(sin((rtengine::RT_PI * i1) / (2 * border))) : 1.0f); + + for (int j = 0; j < TS; ++j) { + float j1 = abs((j > TS / 2 ? j - TS + 1 : j)); + tilemask_ina[i][j] = (vmask * (j1 < border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; + tilemask_outa[i][j] = (vmask2 * (j1 < 2 * border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; + + } + + for (int j = 0; j < TS; ++j) { + float j1 = abs((j > TS / 2 ? j - TS + 1 : j)); + tilemask_inb[i][j] = (vmask * (j1 < border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; + tilemask_outb[i][j] = (vmask2 * (j1 < 2 * border ? SQR(sin((rtengine::RT_PI * j1) / (2 * border))) : 1.0f)) + epsilon; + + } + } + + } + + float *AbloxArray[numThreads]; + float *fAbloxArray[numThreads]; + + float *BbloxArray[numThreads]; + float *fBbloxArray[numThreads]; int begy = lp.yc - lp.lyT; int begx = lp.xc - lp.lxL; @@ -9180,13 +9691,10 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, } } - - float noisevarab_r = 100.f; //SQR(lp.noisecc / 10.0); WaveletDenoiseAllAB(Ldecomp, adecomp, noisevarchrom, madL, variC, edge, noisevarab_r, true, false, false, numThreads); WaveletDenoiseAllAB(Ldecomp, bdecomp, noisevarchrom, madL, variC, edge, noisevarab_r, true, false, false, numThreads); delete[] noisevarchrom; - } } @@ -9371,20 +9879,377 @@ void ImProcFunctions::Lab_Local(int call, float** shbuffer, LabImage * original, fftwf_destroy_plan(plan_backward_blox[0]); fftwf_destroy_plan(plan_forward_blox[1]); fftwf_destroy_plan(plan_backward_blox[1]); - fftwf_cleanup(); + // fftwf_cleanup(); } if (!adecomp.memoryAllocationFailed) { + Ain = new array2D(bfw, bfh); +#ifdef _OPENMP + #pragma omp parallel for + +#endif + + for (int i = 0; i < bfh; ++i) { + for (int j = 0; j < bfw; ++j) { + (*Ain)[i][j] = bufwv.a[i][j]; + } + } adecomp.reconstruct(bufwv.a[0]); } + if (!adecomp.memoryAllocationFailed) { + + const int numblox_W = ceil((static_cast(bfw)) / (offset)) + 2 * blkrad; + const int numblox_H = ceil((static_cast(bfh)) / (offset)) + 2 * blkrad; + + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + //residual between input and denoised a channel + array2D Ldetail(bfw, bfh, ARRAY2D_CLEAR_DATA); + array2D totwt(bfw, bfh, ARRAY2D_CLEAR_DATA); //weight for combining DCT blocks + + for (int i = 0; i < numThreads; ++i) { + AbloxArray[i] = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + fAbloxArray[i] = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + } + +#ifdef _OPENMP + int masterThread = omp_get_thread_num(); +#endif +#ifdef _OPENMP + #pragma omp parallel +#endif + { +#ifdef _OPENMP + int subThread = masterThread * 1 + omp_get_thread_num(); +#else + int subThread = 0; +#endif + float blurbuffer[TS * TS] ALIGNED64; + float *Lblox = AbloxArray[subThread]; + float *fLblox = fAbloxArray[subThread]; + float pBuf[bfw + TS + 2 * blkrad * offset] ALIGNED16; + float nbrwt[TS * TS] ALIGNED64; +#ifdef _OPENMP + #pragma omp for +#endif + + for (int vblk = 0; vblk < numblox_H; ++vblk) { + + int top = (vblk - blkrad) * offset; + float * datarow = pBuf + blkrad * offset; + + for (int i = 0; i < TS; ++i) { + int row = top + i; + int rr = row; + + if (row < 0) { + rr = MIN(-row, bfh - 1); + } else if (row >= bfh) { + rr = MAX(0, 2 * bfh - 2 - row); + } + + for (int j = 0; j < bufwv.W; ++j) { + datarow[j] = ((*Ain)[rr][j] - bufwv.a[rr][j]); + } + + for (int j = -blkrad * offset; j < 0; ++j) { + datarow[j] = datarow[MIN(-j, bfw - 1)]; + } + + for (int j = bfw; j < bfw + TS + blkrad * offset; ++j) { + datarow[j] = datarow[MAX(0, 2 * bfw - 2 - j)]; + }//now we have a padded data row + + //now fill this row of the blocks with Lab high pass data + for (int hblk = 0; hblk < numblox_W; ++hblk) { + int left = (hblk - blkrad) * offset; + int indx = (hblk) * TS; //index of block in malloc + + if (top + i >= 0 && top + i < bfh) { + int j; + + for (j = 0; j < min((-left), TS); ++j) { + Lblox[(indx + i)*TS + j] = tilemask_ina[i][j] * datarow[left + j]; // luma data + } + + for (; j < min(TS, bfw - left); ++j) { + Lblox[(indx + i)*TS + j] = tilemask_ina[i][j] * datarow[left + j]; // luma data + totwt[top + i][left + j] += tilemask_ina[i][j] * tilemask_outa[i][j]; + } + + for (; j < TS; ++j) { + Lblox[(indx + i)*TS + j] = tilemask_ina[i][j] * datarow[left + j]; // luma data + } + } else { + for (int j = 0; j < TS; ++j) { + Lblox[(indx + i)*TS + j] = tilemask_ina[i][j] * datarow[left + j]; // luma data + } + } + + } + + }//end of filling block row + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + //fftwf_print_plan (plan_forward_blox); + if (numblox_W == max_numblox_W) { + fftwf_execute_r2r(plan_forward_blox_a[0], Lblox, fLblox); // DCT an entire row of tiles + } else { + fftwf_execute_r2r(plan_forward_blox_a[1], Lblox, fLblox); // DCT an entire row of tiles + } + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + // now process the vblk row of blocks for noise reduction + float params_Ldetail = min(float(lp.noisechrodetail), 99.9f); // max out to avoid div by zero when using noisevar_Ldetail as divisor + float noisevar_Ldetail = SQR(static_cast(SQR(100. - params_Ldetail) + 50.*(100. - params_Ldetail)) * TS * 0.5f); + + + + for (int hblk = 0; hblk < numblox_W; ++hblk) { + ImProcFunctions::RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer); + }//end of horizontal block loop + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + //now perform inverse FT of an entire row of blocks + if (numblox_W == max_numblox_W) { + fftwf_execute_r2r(plan_backward_blox_a[0], fLblox, Lblox); //for DCT + } else { + fftwf_execute_r2r(plan_backward_blox_a[1], fLblox, Lblox); //for DCT + } + + int topproc = (vblk - blkrad) * offset; + + //add row of blocks to output image tile + ImProcFunctions::RGBoutput_tile_row(Lblox, Ldetail, tilemask_outa, bfh, bfw, topproc); + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + }//end of vertical block loop + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + } + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#ifdef _OPENMP + + #pragma omp parallel for +#endif + + for (int i = 0; i < bfh; ++i) { + for (int j = 0; j < bfw; ++j) { + //may want to include masking threshold for large hipass data to preserve edges/detail + bufwv.a[i][j] += Ldetail[i][j] / totwt[i][j]; //note that labdn initially stores the denoised hipass data + } + } + + } + + delete Ain; + } + + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + + for (int i = 0; i < numThreads; ++i) { + fftwf_free(AbloxArray[i]); + fftwf_free(fAbloxArray[i]); + } + + fftwf_destroy_plan(plan_forward_blox_a[0]); + fftwf_destroy_plan(plan_backward_blox_a[0]); + fftwf_destroy_plan(plan_forward_blox_a[1]); + fftwf_destroy_plan(plan_backward_blox_a[1]); + // fftwf_cleanup(); + + } + if (!bdecomp.memoryAllocationFailed) { + Bin = new array2D(bfw, bfh); +#ifdef _OPENMP + #pragma omp parallel for + +#endif + + for (int i = 0; i < bfh; ++i) { + for (int j = 0; j < bfw; ++j) { + (*Bin)[i][j] = bufwv.b[i][j]; + } + } + bdecomp.reconstruct(bufwv.b[0]); } + if (!bdecomp.memoryAllocationFailed) { + + const int numblox_W = ceil((static_cast(bfw)) / (offset)) + 2 * blkrad; + const int numblox_H = ceil((static_cast(bfh)) / (offset)) + 2 * blkrad; + + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + //residual between input and denoised b channel + array2D Ldetail(bfw, bfh, ARRAY2D_CLEAR_DATA); + array2D totwt(bfw, bfh, ARRAY2D_CLEAR_DATA); //weight for combining DCT blocks + + for (int i = 0; i < numThreads; ++i) { + BbloxArray[i] = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + fBbloxArray[i] = reinterpret_cast(fftwf_malloc(max_numblox_W * TS * TS * sizeof(float))); + } + +#ifdef _OPENMP + int masterThread = omp_get_thread_num(); +#endif +#ifdef _OPENMP + #pragma omp parallel +#endif + { +#ifdef _OPENMP + int subThread = masterThread * 1 + omp_get_thread_num(); +#else + int subThread = 0; +#endif + float blurbuffer[TS * TS] ALIGNED64; + float *Lblox = BbloxArray[subThread]; + float *fLblox = fBbloxArray[subThread]; + float pBuf[bfw + TS + 2 * blkrad * offset] ALIGNED16; + float nbrwt[TS * TS] ALIGNED64; +#ifdef _OPENMP + #pragma omp for +#endif + + for (int vblk = 0; vblk < numblox_H; ++vblk) { + + int top = (vblk - blkrad) * offset; + float * datarow = pBuf + blkrad * offset; + + for (int i = 0; i < TS; ++i) { + int row = top + i; + int rr = row; + + if (row < 0) { + rr = MIN(-row, bfh - 1); + } else if (row >= bfh) { + rr = MAX(0, 2 * bfh - 2 - row); + } + + for (int j = 0; j < bufwv.W; ++j) { + datarow[j] = ((*Bin)[rr][j] - bufwv.b[rr][j]); + } + + for (int j = -blkrad * offset; j < 0; ++j) { + datarow[j] = datarow[MIN(-j, bfw - 1)]; + } + + for (int j = bfw; j < bfw + TS + blkrad * offset; ++j) { + datarow[j] = datarow[MAX(0, 2 * bfw - 2 - j)]; + }//now we have a padded data row + + //now fill this row of the blocks with Lab high pass data + for (int hblk = 0; hblk < numblox_W; ++hblk) { + int left = (hblk - blkrad) * offset; + int indx = (hblk) * TS; //index of block in malloc + + if (top + i >= 0 && top + i < bfh) { + int j; + + for (j = 0; j < min((-left), TS); ++j) { + Lblox[(indx + i)*TS + j] = tilemask_inb[i][j] * datarow[left + j]; // luma data + } + + for (; j < min(TS, bfw - left); ++j) { + Lblox[(indx + i)*TS + j] = tilemask_inb[i][j] * datarow[left + j]; // luma data + totwt[top + i][left + j] += tilemask_inb[i][j] * tilemask_outb[i][j]; + } + + for (; j < TS; ++j) { + Lblox[(indx + i)*TS + j] = tilemask_inb[i][j] * datarow[left + j]; // luma data + } + } else { + for (int j = 0; j < TS; ++j) { + Lblox[(indx + i)*TS + j] = tilemask_inb[i][j] * datarow[left + j]; // luma data + } + } + + } + + }//end of filling block row + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + //fftwf_print_plan (plan_forward_blox); + if (numblox_W == max_numblox_W) { + fftwf_execute_r2r(plan_forward_blox_b[0], Lblox, fLblox); // DCT an entire row of tiles + } else { + fftwf_execute_r2r(plan_forward_blox_b[1], Lblox, fLblox); // DCT an entire row of tiles + } + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + // now process the vblk row of blocks for noise reduction + float params_Ldetail = min(float(lp.noisechrodetail), 99.9f); // max out to avoid div by zero when using noisevar_Ldetail as divisor + float noisevar_Ldetail = SQR(static_cast(SQR(100. - params_Ldetail) + 50.*(100. - params_Ldetail)) * TS * 0.5f); + + + + for (int hblk = 0; hblk < numblox_W; ++hblk) { + ImProcFunctions::RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer); + }//end of horizontal block loop + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + //now perform inverse FT of an entire row of blocks + if (numblox_W == max_numblox_W) { + fftwf_execute_r2r(plan_backward_blox_b[0], fLblox, Lblox); //for DCT + } else { + fftwf_execute_r2r(plan_backward_blox_b[1], fLblox, Lblox); //for DCT + } + + int topproc = (vblk - blkrad) * offset; + + //add row of blocks to output image tile + ImProcFunctions::RGBoutput_tile_row(Lblox, Ldetail, tilemask_outb, bfh, bfw, topproc); + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + }//end of vertical block loop + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + } + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +#ifdef _OPENMP + + #pragma omp parallel for +#endif + + for (int i = 0; i < bfh; ++i) { + for (int j = 0; j < bfw; ++j) { + //may want to include masking threshold for large hipass data to preserve edges/detail + bufwv.b[i][j] += Ldetail[i][j] / totwt[i][j]; //note that labdn initially stores the denoised hipass data + } + } + + } + + delete Bin; + } + + if ((lp.noisecf > 0.1f || lp.noisecc > 0.1f)) { + + for (int i = 0; i < numThreads; ++i) { + fftwf_free(BbloxArray[i]); + fftwf_free(fBbloxArray[i]); + } + + fftwf_destroy_plan(plan_forward_blox_b[0]); + fftwf_destroy_plan(plan_backward_blox_b[0]); + fftwf_destroy_plan(plan_forward_blox_b[1]); + fftwf_destroy_plan(plan_backward_blox_b[1]); + // fftwf_cleanup(); + + } + + fftwf_cleanup(); + DeNoise_Local(call, lp, original, transformed, bufwv, cx, cy); } diff --git a/rtengine/procevents.h b/rtengine/procevents.h index 75e150483..8b0146b73 100644 --- a/rtengine/procevents.h +++ b/rtengine/procevents.h @@ -620,6 +620,7 @@ enum ProcEvent { Evlocallabstruc = 590, Evlocallabwarm = 591, Evlocallabnoiselumdetail = 592, + Evlocallabnoisechrodetail = 593, NUMOFEVENTS diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc index 1745a5aba..b2db01b33 100644 --- a/rtengine/procparams.cc +++ b/rtengine/procparams.cc @@ -2375,6 +2375,7 @@ sensiv(19), noiselumf(0), noiselumc(0), noiselumdetail(0), +noisechrodetail(0), noisechrof(0), noisechroc(0), sharradius(40), @@ -2508,6 +2509,7 @@ bool LocallabParams::operator ==(const LocallabParams& other) const && noiselumf == other.noiselumf && noiselumc == other.noiselumc && noiselumdetail == other.noiselumdetail + && noisechrodetail == other.noisechrodetail && noisechrof == other.noisechrof && noisechroc == other.noisechroc && sharradius == other.sharradius @@ -3483,6 +3485,7 @@ int ProcParams::save(const Glib::ustring& fname, const Glib::ustring& fname2, bo saveToKeyfile(!pedited || pedited->locallab.noiselumf, "Locallab", "noiselumf", locallab.noiselumf, keyFile); saveToKeyfile(!pedited || pedited->locallab.noiselumc, "Locallab", "noiselumc", locallab.noiselumc, keyFile); saveToKeyfile(!pedited || pedited->locallab.noiselumdetail, "Locallab", "noiselumdetail", locallab.noiselumdetail, keyFile); + saveToKeyfile(!pedited || pedited->locallab.noisechrodetail, "Locallab", "noisechrodetail", locallab.noisechrodetail, keyFile); saveToKeyfile(!pedited || pedited->locallab.noisechrof, "Locallab", "noisechrof", locallab.noisechrof, keyFile); saveToKeyfile(!pedited || pedited->locallab.noisechroc, "Locallab", "noisechroc", locallab.noisechroc, keyFile); saveToKeyfile(!pedited || pedited->locallab.sharradius, "Locallab", "Sharradius", locallab.sharradius, keyFile); @@ -4537,6 +4540,7 @@ int ProcParams::load(const Glib::ustring& fname, ParamsEdited* pedited) assignFromKeyfile(keyFile, "Locallab", "noiselumf", pedited, locallab.noiselumf, pedited->locallab.noiselumf); assignFromKeyfile(keyFile, "Locallab", "noiselumc", pedited, locallab.noiselumc, pedited->locallab.noiselumc); assignFromKeyfile(keyFile, "Locallab", "noiselumdetail", pedited, locallab.noiselumdetail, pedited->locallab.noiselumdetail); + assignFromKeyfile(keyFile, "Locallab", "noisechrodetail", pedited, locallab.noisechrodetail, pedited->locallab.noisechrodetail); assignFromKeyfile(keyFile, "Locallab", "noisechrof", pedited, locallab.noisechrof, pedited->locallab.noisechrof); assignFromKeyfile(keyFile, "Locallab", "noisechroc", pedited, locallab.noisechroc, pedited->locallab.noisechroc); assignFromKeyfile(keyFile, "Locallab", "Sharradius", pedited, locallab.sharradius, pedited->locallab.sharradius); diff --git a/rtengine/procparams.h b/rtengine/procparams.h index 85f56de64..8193319c0 100644 --- a/rtengine/procparams.h +++ b/rtengine/procparams.h @@ -915,6 +915,7 @@ struct LocallabParams { int noiselumf; int noiselumc; int noiselumdetail; + int noisechrodetail; int noisechrof; int noisechroc; int sharradius; diff --git a/rtengine/refreshmap.cc b/rtengine/refreshmap.cc index ba24c7314..9010dd19f 100644 --- a/rtengine/refreshmap.cc +++ b/rtengine/refreshmap.cc @@ -619,7 +619,8 @@ int refreshmap[rtengine::NUMOFEVENTS] = { LUMINANCECURVE, // Evlocallabsensiexclu LUMINANCECURVE, // Evlocallabstruc LUMINANCECURVE, // Evlocallabwarm - LUMINANCECURVE // Evlocallabnoiselumdetail + LUMINANCECURVE, // Evlocallabnoiselumdetail + LUMINANCECURVE // Evlocallabnoisechrodetail }; diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index 3fcfe7e9e..c38250b56 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -1103,7 +1103,7 @@ private: } ifstream fich(datalab, ios::in); - int maxdata = 87; //86 10017//85 10016 //82;//78;//73 10011 + int maxdata = 88; //87 10018//86 10017//85 10016 //82;//78;//73 10011 if (fich && versionmip != 0) { std::string inser; @@ -1339,6 +1339,7 @@ private: dataspots[80][0] = params.locallab.struc; dataspots[81][0] = params.locallab.warm; dataspots[82][0] = params.locallab.noiselumdetail; + dataspots[83][0] = params.locallab.noisechrodetail; dataspots[maxdata - 4][0] = 100.f * params.locallab.hueref; dataspots[maxdata - 3][0] = params.locallab.chromaref; @@ -1809,6 +1810,7 @@ private: params.locallab.struc = dataspots[80][sp]; params.locallab.warm = dataspots[81][sp]; params.locallab.noiselumdetail = dataspots[82][sp]; + params.locallab.noisechrodetail = dataspots[83][sp]; params.locallab.hueref = ((float) dataspots[maxdata - 4][sp]) / 100.f; params.locallab.chromaref = dataspots[maxdata - 3][sp]; diff --git a/rtgui/locallab.cc b/rtgui/locallab.cc index 82b1247ee..25d228cc8 100644 --- a/rtgui/locallab.cc +++ b/rtgui/locallab.cc @@ -107,6 +107,7 @@ Locallab::Locallab(): noiselumf(Gtk::manage(new Adjuster(M("TP_LOCALLAB_NOISELUMFINE"), 0, 100, 1, 0))), noiselumc(Gtk::manage(new Adjuster(M("TP_LOCALLAB_NOISELUMCOARSE"), 0, 100, 1, 0))), noiselumdetail(Gtk::manage(new Adjuster(M("TP_LOCALLAB_NOISELUMDETAIL"), 0, 100, 1, 50))), + noisechrodetail(Gtk::manage(new Adjuster(M("TP_LOCALLAB_NOISECHRODETAIL"), 0, 100, 1, 50))), noisechrof(Gtk::manage(new Adjuster(M("TP_LOCALLAB_NOISECHROFINE"), 0, 100, 1, 0))), noisechroc(Gtk::manage(new Adjuster(M("TP_LOCALLAB_NOISECHROCOARSE"), 0, 100, 1, 0))), hueref(Gtk::manage(new Adjuster(M("TP_LOCALLAB_HUEREF"), -3.15, 3.15, 0.01, 0))), @@ -642,6 +643,7 @@ Locallab::Locallab(): noiselumc->setAdjusterListener(this); noiselumdetail->setAdjusterListener(this); + noisechrodetail->setAdjusterListener(this); noisechrof->setAdjusterListener(this); @@ -653,6 +655,7 @@ Locallab::Locallab(): denoisBox->pack_start(*noiselumdetail); denoisBox->pack_start(*noisechrof); denoisBox->pack_start(*noisechroc); + denoisBox->pack_start(*noisechrodetail); neutrHBox1 = Gtk::manage(new Gtk::HBox()); @@ -1307,6 +1310,7 @@ void Locallab::neutral_pressed() noiselumdetail->resetValue(false); noisechrof->resetValue(false); noisechroc->resetValue(false); + noisechrodetail->resetValue(false); } @@ -1811,12 +1815,13 @@ bool Locallab::localComputed_() struc->setValue(nextdatasp[80]); warm->setValue(nextdatasp[81]); noiselumdetail->setValue(nextdatasp[82]); + noisechrodetail->setValue(nextdatasp[83]); - double intermed = 0.01 * (double) nextdatasp[83]; + double intermed = 0.01 * (double) nextdatasp[84]; hueref->setValue(intermed); - chromaref->setValue(nextdatasp[84]); - lumaref->setValue(nextdatasp[85]); - sobelref->setValue(nextdatasp[86]); + chromaref->setValue(nextdatasp[85]); + lumaref->setValue(nextdatasp[86]); + sobelref->setValue(nextdatasp[87]); int *s_datc; s_datc = new int[70]; @@ -2100,7 +2105,7 @@ bool Locallab::localComputed_() void Locallab::localChanged(int **datasp, std::string datastr, std::string ll_str, std::string lh_str, std::string cc_str, std::string hh_str, std::string sk_str, std::string ps_str, std::string ex_str, int sp, int maxdat) { - for (int i = 2; i < 87; i++) { + for (int i = 2; i < 88; i++) { nextdatasp[i] = datasp[i][sp]; } @@ -2189,6 +2194,7 @@ void Locallab::read(const ProcParams* pp, const ParamsEdited* pedited) noiselumdetail->setEditedState(pedited->locallab.noiselumdetail ? Edited : UnEdited); noisechrof->setEditedState(pedited->locallab.noisechrof ? Edited : UnEdited); noisechroc->setEditedState(pedited->locallab.noisechroc ? Edited : UnEdited); + noisechrodetail->setEditedState(pedited->locallab.noisechrodetail ? Edited : UnEdited); pastels->setEditedState(pedited->locallab.pastels ? Edited : UnEdited); saturated->setEditedState(pedited->locallab.saturated ? Edited : UnEdited); @@ -2399,6 +2405,7 @@ void Locallab::read(const ProcParams* pp, const ParamsEdited* pedited) noiselumdetail->setValue(pp->locallab.noiselumdetail); noisechrof->setValue(pp->locallab.noisechrof); noisechroc->setValue(pp->locallab.noisechroc); + noisechrodetail->setValue(pp->locallab.noisechrodetail); expcolor->setEnabled(pp->locallab.expcolor); expexpose->setEnabled(pp->locallab.expexpose); expvibrance->setEnabled(pp->locallab.expvibrance); @@ -2823,6 +2830,7 @@ void Locallab::write(ProcParams* pp, ParamsEdited* pedited) pp->locallab.shcompr = (int)shcompr->getValue(); pp->locallab.noiselumc = noiselumc->getIntValue(); pp->locallab.noiselumdetail = noiselumdetail->getIntValue(); + pp->locallab.noisechrodetail = noisechrodetail->getIntValue(); pp->locallab.noiselumf = noiselumf->getIntValue(); pp->locallab.noisechrof = noisechrof->getIntValue(); pp->locallab.noisechroc = noisechroc->getIntValue(); @@ -2936,6 +2944,7 @@ void Locallab::write(ProcParams* pp, ParamsEdited* pedited) pedited->locallab.noiselumf = noiselumf->getEditedState(); pedited->locallab.noiselumc = noiselumc->getEditedState(); pedited->locallab.noiselumdetail = noiselumdetail->getEditedState(); + pedited->locallab.noisechrodetail = noisechrodetail->getEditedState(); pedited->locallab.noisechrof = noisechrof->getEditedState(); pedited->locallab.noisechroc = noisechroc->getEditedState(); pedited->locallab.sharradius = sharradius->getEditedState(); @@ -3708,6 +3717,7 @@ void Locallab::setDefaults(const ProcParams * defParams, const ParamsEdited * pe noiselumf->setDefault(defParams->locallab.noiselumf); noiselumc->setDefault(defParams->locallab.noiselumc); noiselumdetail->setDefault(defParams->locallab.noiselumdetail); + noisechrodetail->setDefault(defParams->locallab.noisechrodetail); noisechrof->setDefault(defParams->locallab.noisechrof); noisechroc->setDefault(defParams->locallab.noisechroc); sharradius->setDefault(defParams->locallab.sharradius); @@ -3783,6 +3793,7 @@ void Locallab::setDefaults(const ProcParams * defParams, const ParamsEdited * pe noiselumf->setDefaultEditedState(pedited->locallab.noiselumf ? Edited : UnEdited); noiselumc->setDefaultEditedState(pedited->locallab.noiselumc ? Edited : UnEdited); noiselumdetail->setDefaultEditedState(pedited->locallab.noiselumdetail ? Edited : UnEdited); + noisechrodetail->setDefaultEditedState(pedited->locallab.noisechrodetail ? Edited : UnEdited); noisechrof->setDefaultEditedState(pedited->locallab.noisechrof ? Edited : UnEdited); noisechroc->setDefaultEditedState(pedited->locallab.noisechroc ? Edited : UnEdited); sharradius->setDefaultEditedState(pedited->locallab.sharradius ? Edited : UnEdited); @@ -3857,6 +3868,7 @@ void Locallab::setDefaults(const ProcParams * defParams, const ParamsEdited * pe noiselumf->setDefaultEditedState(Irrelevant); noiselumc->setDefaultEditedState(Irrelevant); noiselumdetail->setDefaultEditedState(Irrelevant); + noisechrodetail->setDefaultEditedState(Irrelevant); noisechrof->setDefaultEditedState(Irrelevant); noisechroc->setDefaultEditedState(Irrelevant); sharradius->setDefaultEditedState(Irrelevant); @@ -4032,6 +4044,8 @@ void Locallab::adjusterChanged(Adjuster * a, double newval) listener->panelChanged(Evlocallabnoiselumc, noiselumc->getTextValue()); } else if (a == noiselumdetail) { listener->panelChanged(Evlocallabnoiselumdetail, noiselumdetail->getTextValue()); + } else if (a == noisechrodetail) { + listener->panelChanged(Evlocallabnoisechrodetail, noisechrodetail->getTextValue()); } else if (a == noisechrof) { listener->panelChanged(Evlocallabnoisechrof, noisechrof->getTextValue()); } else if (a == noisechroc) { @@ -4219,6 +4233,7 @@ void Locallab::trimValues(rtengine::procparams::ProcParams * pp) noiselumf->trimValue(pp->locallab.noiselumf); noiselumc->trimValue(pp->locallab.noiselumc); noiselumdetail->trimValue(pp->locallab.noiselumdetail); + noisechrodetail->trimValue(pp->locallab.noisechrodetail); noisechrof->trimValue(pp->locallab.noisechrof); noisechroc->trimValue(pp->locallab.noisechroc); sharradius->trimValue(pp->locallab.sharradius); @@ -4304,6 +4319,7 @@ void Locallab::setBatchMode(bool batchMode) noiselumf->showEditedCB(); noiselumc->showEditedCB(); noiselumdetail->showEditedCB(); + noisechrodetail->showEditedCB(); noisechroc->showEditedCB(); noiselumf->showEditedCB(); sharradius->showEditedCB(); diff --git a/rtgui/locallab.h b/rtgui/locallab.h index 3834a6e02..5a23cf5ce 100644 --- a/rtgui/locallab.h +++ b/rtgui/locallab.h @@ -116,6 +116,7 @@ private: Adjuster* const noiselumf; Adjuster* const noiselumc; Adjuster* const noiselumdetail; + Adjuster* const noisechrodetail; Adjuster* const noisechrof; Adjuster* const noisechroc; @@ -233,7 +234,7 @@ private: - int nextdatasp[87]; + int nextdatasp[88]; int nextlength; std::string nextstr; std::string nextstr2; diff --git a/rtgui/paramsedited.cc b/rtgui/paramsedited.cc index 384ab789e..6b1c30dc1 100644 --- a/rtgui/paramsedited.cc +++ b/rtgui/paramsedited.cc @@ -348,6 +348,7 @@ void ParamsEdited::set(bool v) locallab.noiselumf = v; locallab.noiselumc = v; locallab.noiselumdetail = v; + locallab.noisechrodetail = v; locallab.noisechrof = v; locallab.noisechroc = v; locallab.sharradius = v; @@ -1012,6 +1013,7 @@ void ParamsEdited::initFrom(const std::vector& locallab.noiselumf = locallab.noiselumf && p.locallab.noiselumf == other.locallab.noiselumf; locallab.noiselumc = locallab.noiselumc && p.locallab.noiselumc == other.locallab.noiselumc; locallab.noiselumdetail = locallab.noiselumdetail && p.locallab.noiselumdetail == other.locallab.noiselumdetail; + locallab.noisechrodetail = locallab.noisechrodetail && p.locallab.noisechrodetail == other.locallab.noisechrodetail; locallab.noisechrof = locallab.noisechrof && p.locallab.noisechrof == other.locallab.noisechrof; locallab.noisechroc = locallab.noisechroc && p.locallab.noisechroc == other.locallab.noisechroc; locallab.sharradius = locallab.sharradius && p.locallab.sharradius == other.locallab.sharradius; @@ -2622,6 +2624,10 @@ void ParamsEdited::combine(rtengine::procparams::ProcParams& toEdit, const rteng toEdit.locallab.noiselumdetail = mods.locallab.noiselumdetail; } + if (locallab.noisechrodetail) { + toEdit.locallab.noisechrodetail = mods.locallab.noisechrodetail; + } + if (locallab.noisechrof) { toEdit.locallab.noisechrof = mods.locallab.noisechrof; } diff --git a/rtgui/paramsedited.h b/rtgui/paramsedited.h index 15d45d470..be3c83712 100644 --- a/rtgui/paramsedited.h +++ b/rtgui/paramsedited.h @@ -467,6 +467,7 @@ public: bool noiselumf; bool noiselumc; bool noiselumdetail; + bool noisechrodetail; bool noisechrof; bool noisechroc; bool sharradius;