From d47389f2c169c5eea531fff058dc4ab288b44415 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Mon, 8 Apr 2019 23:59:57 +0200 Subject: [PATCH] locallab: cleanup and small speedup for blur --- rtengine/improccoordinator.cc | 2 +- rtengine/iplocallab.cc | 305 +++++++++++----------------------- 2 files changed, 97 insertions(+), 210 deletions(-) diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc index 0afc4df01..9433a07c5 100644 --- a/rtengine/improccoordinator.cc +++ b/rtengine/improccoordinator.cc @@ -1050,7 +1050,7 @@ void ImProcCoordinator::updatePreviewImage(int todo, bool panningRelatedChange) } - ipf.softLight(nprevl, params->softlight); + ipf.softLight(nprevl, params->softlight); if (params->colorappearance.enabled) { //L histo and Chroma histo for ciecam diff --git a/rtengine/iplocallab.cc b/rtengine/iplocallab.cc index b6ff074cd..f61a3c31d 100644 --- a/rtengine/iplocallab.cc +++ b/rtengine/iplocallab.cc @@ -1133,7 +1133,8 @@ void ImProcFunctions::addGaNoise(LabImage *lab, LabImage *dst, const float mean, srand(1); const float variaFactor = SQR(variance) / sk; - const float randFactor = 1.f / RAND_MAX; + constexpr float randFactor1 = 1.f / RAND_MAX; + constexpr float randFactor2 = (2.f * rtengine::RT_PI_F) / RAND_MAX; #ifdef _OPENMP #pragma omp parallel #endif @@ -1175,8 +1176,8 @@ void ImProcFunctions::addGaNoise(LabImage *lab, LabImage *dst, const float mean, u2 = rand(); } - float u1f = u1 * randFactor; - float u2f = u2 * randFactor; + float u1f = u1 * randFactor1; + float u2f = u2 * randFactor2; float2 sincosval = xsincosf(2.f * rtengine::RT_PI_F * u2f); float factor = sqrtf(-2.f * xlogf(u1f)); @@ -1414,19 +1415,23 @@ void ImProcFunctions::BlurNoise_Local(int call, LabImage * tmp1, LabImage * tmp2 //local BLUR BENCHFUN - const float ach = (float)lp.trans / 100.f; - int GW = transformed->W; - int GH = transformed->H; - float refa = chromaref * cos(hueref); - float refb = chromaref * sin(hueref); + const float ach = lp.trans / 100.f; + const int GW = transformed->W; + const int GH = transformed->H; + const float refa = chromaref * cos(hueref) * 327.68f; + const float refb = chromaref * sin(hueref) * 327.68f; + const float refL = lumaref * 327.68f; + //balance deltaE float kL = lp.balance; float kab = 1.f; balancedeltaE(kL, kab); + kab /= SQR(327.68f); + kL /= SQR(327.68f); - LabImage *origblur = new LabImage(GW, GH); + std::unique_ptr origblur(new LabImage(GW, GH)); - float radius = 3.f / sk; + const float radius = 3.f / sk; #ifdef _OPENMP #pragma omp parallel #endif @@ -1434,13 +1439,16 @@ void ImProcFunctions::BlurNoise_Local(int call, LabImage * tmp1, LabImage * tmp2 gaussianBlur(original->L, origblur->L, GW, GH, radius); gaussianBlur(original->a, origblur->a, GW, GH, radius); gaussianBlur(original->b, origblur->b, GW, GH, radius); - } #ifdef _OPENMP #pragma omp parallel if (multiThread) #endif { + const int begy = int (lp.yc - lp.lyT); + const int begx = int (lp.xc - lp.lxL); + const float mindE = 2.f + MINSCOPE * lp.sensbn * lp.thr; + const float maxdE = 5.f + MAXSCOPE * lp.sensbn * (1 + 0.1f * lp.thr); #ifdef _OPENMP #pragma omp for schedule(dynamic,16) @@ -1452,24 +1460,10 @@ void ImProcFunctions::BlurNoise_Local(int call, LabImage * tmp1, LabImage * tmp2 const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing if (isZone0) { // outside selection and outside transition zone => no effect, keep original values - for (int x = 0; x < transformed->W; x++) { - if (lp.blurmet == 0) { - transformed->L[y][x] = original->L[y][x]; - } - - if (lp.blurmet == 2) { - transformed->L[y][x] = tmp2->L[y][x]; - } - } - - if (!lp.actsp) { + if (lp.blurmet == 2) { for (int x = 0; x < transformed->W; x++) { - if (lp.blurmet == 0) { - transformed->a[y][x] = original->a[y][x]; - transformed->b[y][x] = original->b[y][x]; - } - - if (lp.blurmet == 2) { + transformed->L[y][x] = tmp2->L[y][x]; + if (!lp.actsp) { transformed->a[y][x] = tmp2->a[y][x]; transformed->b[y][x] = tmp2->b[y][x]; } @@ -1481,10 +1475,6 @@ void ImProcFunctions::BlurNoise_Local(int call, LabImage * tmp1, LabImage * tmp2 for (int x = 0, lox = cx + x; x < transformed->W; x++, lox++) { int zone = 0; - // int lox = cx + x; - int begx = int (lp.xc - lp.lxL); - int begy = int (lp.yc - lp.lyT); - float localFactor = 1.f; if (lp.shapmet == 0) { @@ -1495,21 +1485,9 @@ void ImProcFunctions::BlurNoise_Local(int call, LabImage * tmp1, LabImage * tmp2 if (zone == 0) { // outside selection and outside transition zone => no effect, keep original values - if (lp.blurmet == 0) { - transformed->L[y][x] = original->L[y][x]; - } - if (lp.blurmet == 2) { transformed->L[y][x] = tmp2->L[y][x]; - } - - if (!lp.actsp) { - if (lp.blurmet == 0) { - transformed->a[y][x] = original->a[y][x]; - transformed->b[y][x] = original->b[y][x]; - } - - if (lp.blurmet == 2) { + if (!lp.actsp) { transformed->a[y][x] = tmp2->a[y][x]; transformed->b[y][x] = tmp2->b[y][x]; } @@ -1518,47 +1496,29 @@ void ImProcFunctions::BlurNoise_Local(int call, LabImage * tmp1, LabImage * tmp2 continue; } - float rL = origblur->L[y][x] / 327.68f; - float dE = sqrt(kab * SQR(refa - origblur->a[y][x] / 327.68f) + kab * SQR(refb - origblur->b[y][x] / 327.68f) + kL * SQR(lumaref - rL)); - - float cli = (buflight[loy - begy][lox - begx]); - float clc = (bufchro[loy - begy][lox - begx]); - float reducdE = 0.f; - float mindE = 2.f + MINSCOPE * lp.sensbn * lp.thr; - float maxdE = 5.f + MAXSCOPE * lp.sensbn * (1 + 0.1f * lp.thr); - - float ar = 1.f / (mindE - maxdE); - - float br = - ar * maxdE; - - if (dE > maxdE) { - reducdE = 0.f; - } - - if (dE > mindE && dE <= maxdE) { - reducdE = ar * dE + br; - } - - if (dE <= mindE) { - reducdE = 1.f; - } - - reducdE = pow(reducdE, lp.iterat); + const float dE = sqrt(kab * (SQR(refa - origblur->a[y][x]) + SQR(refb - origblur->b[y][x])) + kL * SQR(refL - origblur->L[y][x])); + float reducdE; if (lp.sensbn > 99) { reducdE = 1.f; + } else if (dE > maxdE) { + reducdE = 0.f; + } else if (dE > mindE && dE <= maxdE) { + const float ar = 1.f / (mindE - maxdE); + const float br = - ar * maxdE; + reducdE = pow(ar * dE + br, lp.iterat); + } else /*if (dE <= mindE) */ { + reducdE = 1.f; } - - float realstrdE = reducdE * cli; - float realstrchdE = reducdE * clc; + const float realstrdE = reducdE * buflight[loy - begy][lox - begx]; + const float realstrchdE = localFactor * (1.f + (reducdE * bufchro[loy - begy][lox - begx]) / 100.f); switch (zone) { case 1: { // inside transition zone float difL, difa, difb; - float factorx = localFactor; if (call <= 3) { difL = tmp1->L[loy - begy][lox - begx] - original->L[y][x]; @@ -1572,31 +1532,22 @@ void ImProcFunctions::BlurNoise_Local(int call, LabImage * tmp1, LabImage * tmp2 } - difL *= factorx * (100.f + realstrdE) / 100.f; - -// difL *= kch * fach; + difL *= localFactor * (100.f + realstrdE) / 100.f; if (lp.blurmet == 0) { transformed->L[y][x] = CLIP(original->L[y][x] + difL); - } - - if (lp.blurmet == 2) { + } else if (lp.blurmet == 2) { transformed->L[y][x] = CLIP(tmp2->L[y][x] - difL); } if (!lp.actsp) { - difa *= factorx * (100.f + realstrchdE) / 100.f; - difb *= factorx * (100.f + realstrchdE) / 100.f; - -// difa *= kch * fach; -// difb *= kch * fach; + difa *= realstrchdE; + difb *= realstrchdE; if (lp.blurmet == 0) { transformed->a[y][x] = CLIPC(original->a[y][x] + difa); transformed->b[y][x] = CLIPC(original->b[y][x] + difb); - } - - if (lp.blurmet == 2) { + } else if (lp.blurmet == 2) { transformed->a[y][x] = CLIPC(tmp2->a[y][x] - difa); transformed->b[y][x] = CLIPC(tmp2->b[y][x] - difb); } @@ -1608,48 +1559,35 @@ void ImProcFunctions::BlurNoise_Local(int call, LabImage * tmp1, LabImage * tmp2 case 2: { // inside selection => full effect, no transition float difL, difa, difb; - if (call <= 3) { - difL = tmp1->L[loy - begy][lox - begx] - original->L[y][x]; - difa = tmp1->a[loy - begy][lox - begx] - original->a[y][x]; - difb = tmp1->b[loy - begy][lox - begx] - original->b[y][x]; - } else { - difL = tmp1->L[y][x] - original->L[y][x]; - difa = tmp1->a[y][x] - original->a[y][x]; - difb = tmp1->b[y][x] - original->b[y][x]; - - } + difL = tmp1->L[loy - begy][lox - begx] - original->L[y][x]; + difa = tmp1->a[loy - begy][lox - begx] - original->a[y][x]; + difb = tmp1->b[loy - begy][lox - begx] - original->b[y][x]; difL *= (100.f + realstrdE) / 100.f; if (lp.blurmet == 0) { transformed->L[y][x] = CLIP(original->L[y][x] + difL); - } - - if (lp.blurmet == 2) { + } else if (lp.blurmet == 2) { transformed->L[y][x] = CLIP(tmp2->L[y][x] - difL); } if (!lp.actsp) { - difa *= (100.f + realstrchdE) / 100.f; - difb *= (100.f + realstrchdE) / 100.f; + difa *= realstrchdE; + difb *= realstrchdE; if (lp.blurmet == 0) { transformed->a[y][x] = CLIPC(original->a[y][x] + difa); ; transformed->b[y][x] = CLIPC(original->b[y][x] + difb); - } - - if (lp.blurmet == 2) { + } else if (lp.blurmet == 2) { transformed->a[y][x] = CLIPC(tmp2->a[y][x] - difa); transformed->b[y][x] = CLIPC(tmp2->b[y][x] - difb); } - } } } } } } - delete origblur; } void ImProcFunctions::InverseReti_Local(const struct local_params & lp, const float hueref, const float chromaref, const float lumaref, LabImage * original, LabImage * transformed, const LabImage * const tmp1, int cx, int cy, int chro, int sk) @@ -4141,11 +4079,10 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o //Blur and noise - if (((radius >= 1.5 * GAUSS_SKIP && lp.rad > 1.) || lp.stren > 0.1) && lp.blurena) { // radius < GAUSS_SKIP means no gauss, just copy of original image - LabImage *tmp1 = nullptr; - LabImage *tmp2 = nullptr; - LabImage *bufgb = nullptr; - float *origBuffer = nullptr; + if (((radius >= 1.5 * GAUSS_SKIP && lp.rad > 1.) || lp.stren > 0.1) && lp.blurena) { // radius < GAUSS_SKIP means no gauss, just copy of original image + std::unique_ptr tmp1; + std::unique_ptr tmp2; + std::unique_ptr bufgb; // LabImage *deltasobelL = nullptr; int GW = transformed->W; int GH = transformed->H; @@ -4154,30 +4091,8 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o JaggedArray buflight(bfw, bfh); JaggedArray bufchro(bfw, bfh); - float *orig[bfh] ALIGNED16; - - if (call <= 3 && lp.blurmet != 1) { - bufgb = new LabImage(bfw, bfh); - - origBuffer = new float[bfh * bfw]; - - for (int i = 0; i < bfh; i++) { - orig[i] = &origBuffer[i * bfw]; - } - -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int ir = 0; ir < bfh; ir++) //fill with 0 - for (int jr = 0; jr < bfw; jr++) { - bufgb->L[ir][jr] = 0.f; - bufgb->a[ir][jr] = 0.f; - bufgb->b[ir][jr] = 0.f; - buflight[ir][jr] = 0.f; - bufchro[ir][jr] = 0.f; - - } + if (call <= 3 && lp.blurmet != 1) { + bufgb.reset(new LabImage(bfw, bfh, true)); int begy = lp.yc - lp.lyT; int begx = lp.xc - lp.lxL; @@ -4188,24 +4103,23 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o #pragma omp parallel for schedule(dynamic,16) #endif - for (int y = 0; y < transformed->H ; y++) //{ - for (int x = 0; x < transformed->W; x++) { - int lox = cx + x; - int loy = cy + y; - - if (lox >= begx && lox < xEn && loy >= begy && loy < yEn) { - bufgb->L[loy - begy][lox - begx] = original->L[y][x]; - bufgb->a[loy - begy][lox - begx] = original->a[y][x]; - bufgb->b[loy - begy][lox - begx] = original->b[y][x]; + for (int y = 0; y < transformed->H ; y++) { + const int loy = cy + y; + if (loy >= begy && loy < yEn) { + for (int x = 0; x < transformed->W; x++) { + const int lox = cx + x; + if (lox >= begx && lox < xEn) { + bufgb->L[loy - begy][lox - begx] = original->L[y][x]; + bufgb->a[loy - begy][lox - begx] = original->a[y][x]; + bufgb->b[loy - begy][lox - begx] = original->b[y][x]; + } } } - - tmp1 = new LabImage(bfw, bfh); - - + } + tmp1.reset(new LabImage(bfw, bfh)); if (lp.blurmet == 2) { - tmp2 = new LabImage(transformed->W, transformed->H); + tmp2.reset(new LabImage(transformed->W, transformed->H)); #ifdef _OPENMP #pragma omp parallel #endif @@ -4214,7 +4128,6 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o gaussianBlur(original->a, tmp2->a, GW, GH, radius); gaussianBlur(original->b, tmp2->b, GW, GH, radius); } - } @@ -4231,7 +4144,7 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o } else { - tmp1 = new LabImage(transformed->W, transformed->H);; + tmp1.reset(new LabImage(transformed->W, transformed->H)); #ifdef _OPENMP #pragma omp parallel @@ -4246,95 +4159,69 @@ void ImProcFunctions::Lab_Local(int call, int sp, float** shbuffer, LabImage * o if (lp.stren > 0.1f) { if (lp.blurmet <= 1) { - float mean = 0.f;//0 best result float variance = lp.stren ; //(double) SQR(lp.stren)/sk; - addGaNoise(tmp1, tmp1, mean, variance, sk) ; + addGaNoise(tmp1.get(), tmp1.get(), mean, variance, sk) ; } - } if (lp.blurmet != 1) { //blur and noise (center) - float minL = tmp1->L[0][0] - bufgb->L[0][0]; - float maxL = minL; + float minL = tmp1->L[0][0] - bufgb->L[0][0]; + float maxL = minL; #ifdef _OPENMP - #pragma omp parallel for reduction(max:maxL) reduction(min:minL) schedule(dynamic,16) + #pragma omp parallel for reduction(max:maxL) reduction(min:minL) schedule(dynamic,16) #endif - for (int ir = 0; ir < bfh; ir++) { - for (int jr = 0; jr < bfw; jr++) { - buflight[ir][jr] = tmp1->L[ir][jr] - bufgb->L[ir][jr]; - minL = rtengine::min(minL, buflight[ir][jr]); - maxL = rtengine::max(maxL, buflight[ir][jr]); - } + for (int ir = 0; ir < bfh; ir++) { + for (int jr = 0; jr < bfw; jr++) { + buflight[ir][jr] = tmp1->L[ir][jr] - bufgb->L[ir][jr]; + minL = rtengine::min(minL, buflight[ir][jr]); + maxL = rtengine::max(maxL, buflight[ir][jr]); } + } - float coef = 0.01f * (max(fabs(minL), fabs(maxL))); + const float coef = 0.01f * (max(fabs(minL), fabs(maxL))); #ifdef _OPENMP #pragma omp parallel for #endif - for (int ir = 0; ir < bfh; ir++) + for (int ir = 0; ir < bfh; ir++) { for (int jr = 0; jr < bfw; jr++) { buflight[ir][jr] /= coef; } + } + float minC = sqrt((SQR(tmp1->a[0][0]) + SQR(tmp1->b[0][0]))) - sqrt(SQR(bufgb->a[0][0]) + SQR(bufgb->b[0][0])); + float maxC = minC; #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic,16) + #pragma omp parallel for reduction(max:maxC) reduction(min:minC) schedule(dynamic,16) #endif - for (int ir = 0; ir < bfh; ir += 1) - for (int jr = 0; jr < bfw; jr += 1) { - orig[ir][jr] = sqrt(SQR(bufgb->a[ir][jr]) + SQR(bufgb->b[ir][jr])); + for (int ir = 0; ir < bfh; ir++) { + for (int jr = 0; jr < bfw; jr++) { + bufchro[ir][jr] = sqrt((SQR(tmp1->a[ir][jr]) + SQR(tmp1->b[ir][jr]))) - sqrt(SQR(bufgb->a[ir][jr]) + SQR(bufgb->b[ir][jr])); + minC = rtengine::min(minC, bufchro[ir][jr]); + maxC = rtengine::max(maxC, bufchro[ir][jr]); } + } - float minC = sqrt((SQR(tmp1->a[0][0]) + SQR(tmp1->b[0][0]))) - orig[0][0]; - float maxC = minC; -#ifdef _OPENMP - #pragma omp parallel for reduction(max:maxC) reduction(min:minC) schedule(dynamic,16) -#endif - - for (int ir = 0; ir < bfh; ir++) { - for (int jr = 0; jr < bfw; jr++) { - bufchro[ir][jr] = sqrt((SQR(tmp1->a[ir][jr]) + SQR(tmp1->b[ir][jr]))) - orig[ir][jr]; - minC = rtengine::min(minC, bufchro[ir][jr]); - maxC = rtengine::max(maxC, bufchro[ir][jr]); - } - } - - float coefC = 0.01f * (max(fabs(minC), fabs(maxC))); + const float coefC = 0.01f * (max(fabs(minC), fabs(maxC))); #ifdef _OPENMP #pragma omp parallel for #endif - for (int ir = 0; ir < bfh; ir++) + for (int ir = 0; ir < bfh; ir++) { for (int jr = 0; jr < bfw; jr++) { - bufchro[ir][jr] /= coefC; } - - BlurNoise_Local(call, tmp1, tmp2, buflight, bufchro, hueref, chromaref, lumaref, lp, original, transformed, cx, cy, sk); + bufchro[ir][jr] /= coefC; + } + } + BlurNoise_Local(call, tmp1.get(), tmp2.get(), buflight, bufchro, hueref, chromaref, lumaref, lp, original, transformed, cx, cy, sk); } else { - InverseBlurNoise_Local(lp, original, transformed, tmp1, cx, cy); - + InverseBlurNoise_Local(lp, original, transformed, tmp1.get(), cx, cy); } - - if (call <= 3 && lp.blurmet != 1) { - - delete bufgb; - - delete [] origBuffer; - - - } - - delete tmp1; - - if (lp.blurmet == 2) { - delete tmp2; - } - }