From cfbcd6cd5b19027600ccd0225c7ece9d98257fd4 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Fri, 23 Feb 2018 13:30:52 +0100 Subject: [PATCH] Speedup for PF_correct_RTcam() --- rtengine/PF_correct_RT.cc | 359 +++++++++++++++++--------------------- rtengine/improcfun.h | 8 +- rtengine/procparams.h | 2 +- 3 files changed, 165 insertions(+), 204 deletions(-) diff --git a/rtengine/PF_correct_RT.cc b/rtengine/PF_correct_RT.cc index 2b5a7e713..7c8559a05 100644 --- a/rtengine/PF_correct_RT.cc +++ b/rtengine/PF_correct_RT.cc @@ -23,7 +23,6 @@ // along with this program. If not, see . // //////////////////////////////////////////////////////////////// -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #include "gauss.h" #include "improcfun.h" @@ -39,18 +38,16 @@ namespace rtengine { -void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) +void ImProcFunctions::PF_correct_RT(LabImage * lab, double radius, int thresh) { BENCHFUN - const int halfwin = std::ceil(2 * radius) + 1; - std::unique_ptr chCurve; if (params->defringe.huecurve.size() && FlatCurveType(params->defringe.huecurve.at(0)) > FCT_Linear) { chCurve.reset(new FlatCurve(params->defringe.huecurve)); } // local variables - const int width = src->W, height = src->H; + const int width = lab->W, height = lab->H; //temporary array to store chromaticity const std::unique_ptr fringe(new float[width * height]); @@ -58,23 +55,17 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) JaggedArray tmpa(width, height); JaggedArray tmpb(width, height); -#ifdef _OPENMP - #pragma omp parallel -#endif - { - gaussianBlur(src->a, tmpa, width, height, radius); - gaussianBlur(src->b, tmpb, width, height, radius); - } - double chromave = 0.0; // use double precision for large summations #ifdef _OPENMP #pragma omp parallel #endif { - float chromaChfactor = 1.f; + gaussianBlur(lab->a, tmpa, width, height, radius); + gaussianBlur(lab->b, tmpb, width, height, radius); + #ifdef _OPENMP - #pragma omp for reduction(+:chromave) + #pragma omp for reduction(+:chromave) schedule(dynamic,16) #endif for (int i = 0; i < height; i++) { @@ -85,24 +76,25 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) int k = 0; for (; k < width - 3; k += 4) { - STVFU(fringe[i * width + k], xatan2f(LVFU(src->b[i][k]), LVFU(src->a[i][k]))); + STVFU(fringe[i * width + k], xatan2f(LVFU(lab->b[i][k]), LVFU(lab->a[i][k]))); } for (; k < width; k++) { - fringe[i * width + k] = xatan2f(src->b[i][k], src->a[i][k]); + fringe[i * width + k] = xatan2f(lab->b[i][k], lab->a[i][k]); } } #endif for (int j = 0; j < width; j++) { + float chromaChfactor = 1.f; if (chCurve) { #ifdef __SSE2__ // use the precalculated atan values const float HH = fringe[i * width + j]; #else // no precalculated values without SSE => calculate - const float HH = xatan2f(src->b[i][j], src->a[i][j]); + const float HH = xatan2f(lab->b[i][j], lab->a[i][j]); #endif float chparam = chCurve->getVal((Color::huelab_to_huehsv2(HH))) - 0.5f; // get C=f(H) @@ -113,7 +105,7 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) chromaChfactor = SQR(1.f + chparam); } - const float chroma = chromaChfactor * (SQR(src->a[i][j] - tmpa[i][j]) + SQR(src->b[i][j] - tmpb[i][j])); // modulate chroma function hue + const float chroma = chromaChfactor * (SQR(lab->a[i][j] - tmpa[i][j]) + SQR(lab->b[i][j] - tmpb[i][j])); // modulate chroma function hue chromave += chroma; fringe[i * width + j] = chroma; } @@ -133,9 +125,10 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) } const float threshfactor = 1.f / (SQR(thresh / 33.f) * chromave * 5.0f + chromave); + const int halfwin = std::ceil(2 * radius) + 1; // Issue 1674: -// often, CA isn't evenly distributed, e.g. a lot in contrasty regions and none in the sky. +// often, CA is not evenly distributed, e.g. a lot in contrasty regions and none in the sky. // so it's better to schedule dynamic and let every thread only process 16 rows, to avoid running big threads out of work // Measured it and in fact gives better performance than without schedule(dynamic,16). Of course, there could be a better // choice for the chunk_size than 16 @@ -156,13 +149,13 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) for (int j1 = 0; j1 < j + halfwin; j1++) { //neighbourhood average of pixels weighted by chrominance const float wt = fringe[i1 * width + j1]; - atot += wt * src->a[i1][j1]; - btot += wt * src->b[i1][j1]; + atot += wt * lab->a[i1][j1]; + btot += wt * lab->b[i1][j1]; norm += wt; } - src->a[i][j] = atot / norm; - src->b[i][j] = btot / norm; + lab->a[i][j] = atot / norm; + lab->b[i][j] = btot / norm; } } @@ -176,13 +169,13 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { //neighbourhood average of pixels weighted by chrominance const float wt = fringe[i1 * width + j1]; - atot += wt * src->a[i1][j1]; - btot += wt * src->b[i1][j1]; + atot += wt * lab->a[i1][j1]; + btot += wt * lab->b[i1][j1]; norm += wt; } - src->a[i][j] = atot / norm; - src->b[i][j] = btot / norm; + lab->a[i][j] = atot / norm; + lab->b[i][j] = btot / norm; } } @@ -196,23 +189,22 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) for (int j1 = j - halfwin + 1; j1 < width; j1++) { //neighbourhood average of pixels weighted by chrominance const float wt = fringe[i1 * width + j1]; - atot += wt * src->a[i1][j1]; - btot += wt * src->b[i1][j1]; + atot += wt * lab->a[i1][j1]; + btot += wt * lab->b[i1][j1]; norm += wt; } - src->a[i][j] = atot / norm; - src->b[i][j] = btot / norm; + lab->a[i][j] = atot / norm; + lab->b[i][j] = btot / norm; } } }//end of ab channel averaging } } -void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh) +void ImProcFunctions::PF_correct_RTcam(CieImage * ncie, double radius, int thresh) { BENCHFUN - const int halfwin = std::ceil(2 * radius) + 1; std::unique_ptr chCurve; @@ -221,13 +213,13 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh } // local variables - const int width = src->W, height = src->H; + const int width = ncie->W, height = ncie->H; //temporary array to store chromaticity const std::unique_ptr fringe(new float[width * height]); - float** const sraa = src->h_p; // we use the src->h_p buffer to avoid memory allocation/deallocation and reduce memory pressure - float** const srbb = src->C_p; // we use the src->C_p buffer to avoid memory allocation/deallocation and reduce memory pressure + float** const sraa = ncie->h_p; // we use the ncie->h_p buffer to avoid memory allocation/deallocation and reduce memory pressure + float** const srbb = ncie->C_p; // we use the ncie->C_p buffer to avoid memory allocation/deallocation and reduce memory pressure JaggedArray tmaa(width, height); JaggedArray tmbb(width, height); @@ -247,40 +239,37 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh #ifdef __SSE2__ for (; j < width - 3; j += 4) { - const vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(src->h_p[i][j])); - STVFU(sraa[i][j], LVFU(src->C_p[i][j]) * sincosvalv.y); - STVFU(srbb[i][j], LVFU(src->C_p[i][j]) * sincosvalv.x); + const vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(ncie->h_p[i][j])); + STVFU(sraa[i][j], LVFU(ncie->C_p[i][j]) * sincosvalv.y); + STVFU(srbb[i][j], LVFU(ncie->C_p[i][j]) * sincosvalv.x); } #endif for (; j < width; j++) { - const float2 sincosval = xsincosf(RT_PI_F_180 * src->h_p[i][j]); - sraa[i][j] = src->C_p[i][j] * sincosval.y; - srbb[i][j] = src->C_p[i][j] * sincosval.x; + const float2 sincosval = xsincosf(RT_PI_F_180 * ncie->h_p[i][j]); + sraa[i][j] = ncie->C_p[i][j] * sincosval.y; + srbb[i][j] = ncie->C_p[i][j] * sincosval.x; } } } + double chromave = 0.0; // use double precision for large summations + #ifdef _OPENMP #pragma omp parallel #endif { gaussianBlur(sraa, tmaa, width, height, radius); gaussianBlur(srbb, tmbb, width, height, radius); - } + float chromaChfactor = 1.f; +#ifdef _OPENMP + #pragma omp for reduction(+:chromave) schedule(dynamic,16) +#endif + + for (int i = 0; i < height; i++) { #ifdef __SSE2__ - - if (chCurve) { - // vectorized precalculation of the atan2 values -#ifdef _OPENMP - #pragma omp parallel -#endif - { -#ifdef _OPENMP - #pragma omp for -#endif - - for (int i = 0; i < height; i++) { + // vectorized per row precalculation of the atan2 values + if (chCurve) { int j = 0; for (; j < width - 3; j += 4) { STVFU(fringe[i * width + j], xatan2f(LVFU(srbb[i][j]), LVFU(sraa[i][j]))); @@ -290,23 +279,8 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh fringe[i * width + j] = xatan2f(srbb[i][j], sraa[i][j]); } } - } - } - #endif - double chromave = 0.0; // use double precision for large summations - -#ifdef _OPENMP - #pragma omp parallel -#endif - { - float chromaChfactor = 1.f; -#ifdef _OPENMP - #pragma omp for reduction(+:chromave) -#endif - - for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { if (chCurve) { #ifdef __SSE2__ @@ -345,6 +319,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh } const float threshfactor = 1.f / (SQR(thresh / 33.f) * chromave * 5.0f + chromave); + const int halfwin = std::ceil(2 * radius) + 1; // Issue 1674: // often, CA isn't evenly distributed, e.g. a lot in contrasty regions and none in the sky. @@ -352,6 +327,8 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh // Measured it and in fact gives better performance than without schedule(dynamic,16). Of course, there could be a better // choice for the chunk_size than 16 // Issue 1972: Split this loop in three parts to avoid most of the min and max-operations + + #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif @@ -359,13 +336,9 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh for (int i = 0; i < height; i++) { int j = 0; for (; j < halfwin - 1; j++) { - tmaa[i][j] = sraa[i][j]; - tmbb[i][j] = srbb[i][j]; - if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - - for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = 0; j1 < j + halfwin; j1++) { //neighbourhood average of pixels weighted by chrominance const float wt = fringe[i1 * width + j1]; @@ -373,22 +346,19 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh btot += wt * srbb[i1][j1]; norm += wt; } - - if (norm > 0.f) { - tmaa[i][j] = atot / norm; - tmbb[i][j] = btot / norm; } + tmaa[i][j] = atot / norm; + tmbb[i][j] = btot / norm; + } else { + tmaa[i][j] = sraa[i][j]; + tmbb[i][j] = srbb[i][j]; } } for (; j < width - halfwin + 1; j++) { - tmaa[i][j] = sraa[i][j]; - tmbb[i][j] = srbb[i][j]; - if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - - for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { //neighbourhood average of pixels weighted by chrominance const float wt = fringe[i1 * width + j1]; @@ -396,22 +366,19 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh btot += wt * srbb[i1][j1]; norm += wt; } - - if (norm > 0.f) { - tmaa[i][j] = atot / norm; - tmbb[i][j] = btot / norm; } - } + tmaa[i][j] = atot / norm; + tmbb[i][j] = btot / norm; + } else { + tmaa[i][j] = sraa[i][j]; + tmbb[i][j] = srbb[i][j]; + } } for (; j < width; j++) { - tmaa[i][j] = sraa[i][j]; - tmbb[i][j] = srbb[i][j]; - if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - - for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = j - halfwin + 1; j1 < width; j1++) { //neighbourhood average of pixels weighted by chrominance const float wt = fringe[i1 * width + j1]; @@ -419,48 +386,42 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh btot += wt * srbb[i1][j1]; norm += wt; } - - if (norm > 0.f) { - tmaa[i][j] = atot / norm; - tmbb[i][j] = btot / norm; } + tmaa[i][j] = atot / norm; + tmbb[i][j] = btot / norm; + } else { + tmaa[i][j] = sraa[i][j]; + tmbb[i][j] = srbb[i][j]; } } - } //end of ab channel averaging - -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int i = 0; i < height; i++) { - int j = 0; + j = 0; #ifdef __SSE2__ for (; j < width - 3; j += 4) { const vfloat interav = LVFU(tmaa[i][j]); const vfloat interbv = LVFU(tmbb[i][j]); - STVFU(src->h_p[i][j], xatan2f(interbv, interav) / F2V(RT_PI_F_180)); - STVFU(src->C_p[i][j], vsqrtf(SQRV(interbv) + SQRV(interav))); + STVFU(ncie->h_p[i][j], xatan2f(interbv, interav) / F2V(RT_PI_F_180)); + STVFU(ncie->C_p[i][j], vsqrtf(SQRV(interbv) + SQRV(interav))); } #endif for (; j < width; j++) { const float intera = tmaa[i][j]; const float interb = tmbb[i][j]; - src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; - src->C_p[i][j] = sqrt(SQR(interb) + SQR(intera)); + ncie->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; + ncie->C_p[i][j] = sqrt(SQR(interb) + SQR(intera)); } - } + } //end of ab channel averaging } } -void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, int mode, float chrom, bool hotbad) +void ImProcFunctions::Badpixelscam(CieImage * ncie, double radius, int thresh, int mode, float chrom, bool hotbad) { BENCHFUN if (mode == 2 && radius < 0.25) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here return; } - const int width = src->W, height = src->H; + const int width = ncie->W, height = ncie->H; constexpr float eps = 1.f; @@ -474,7 +435,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #endif { //luma sh_p - gaussianBlur(src->sh_p, tmL, width, height, radius / 2.0);//low value to avoid artifacts + gaussianBlur(ncie->sh_p, tmL, width, height, radius / 2.0);//low value to avoid artifacts } //luma badpixels @@ -496,12 +457,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in for (int i = 0; i < height; i++) { int j = 0; for (; j < 2; j++) { - const float shfabs = std::fabs(src->sh_p[i][j] - tmL[i][j]); + const float shfabs = std::fabs(ncie->sh_p[i][j] - tmL[i][j]); float shmed = 0.f; for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = 0; j1 <= j + 2; j1++) { - shmed += std::fabs(src->sh_p[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(ncie->sh_p[i1][j1] - tmL[i1][j1]); } } @@ -511,12 +472,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #ifdef __SSE2__ for (; j < width - 5; j += 4) { - const vfloat shfabsv = vabsf(LVFU(src->sh_p[i][j]) - LVFU(tmL[i][j])); + const vfloat shfabsv = vabsf(LVFU(ncie->sh_p[i][j]) - LVFU(tmL[i][j])); vfloat shmedv = ZEROV; for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { - shmedv += vabsf(LVFU(src->sh_p[i1][j1]) - LVFU(tmL[i1][j1])); + shmedv += vabsf(LVFU(ncie->sh_p[i1][j1]) - LVFU(tmL[i1][j1])); } } @@ -524,12 +485,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } #endif for (; j < width - 2; j++) { - const float shfabs = std::fabs(src->sh_p[i][j] - tmL[i][j]); + const float shfabs = std::fabs(ncie->sh_p[i][j] - tmL[i][j]); float shmed = 0.f; for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { - shmed += std::fabs(src->sh_p[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(ncie->sh_p[i1][j1] - tmL[i1][j1]); } } @@ -537,12 +498,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } for (; j < width; j++) { - const float shfabs = std::fabs(src->sh_p[i][j] - tmL[i][j]); + const float shfabs = std::fabs(ncie->sh_p[i][j] - tmL[i][j]); float shmed = 0.f; for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 < width; j1++) { - shmed += std::fabs(src->sh_p[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(ncie->sh_p[i1][j1] - tmL[i1][j1]); } } @@ -564,18 +525,18 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = 0; j1 <= j + 2; j1++) { if (!badpix[i1 * width + j1]) { - sum += src->sh_p[i1][j1]; + sum += ncie->sh_p[i1][j1]; tot += 1.f; - const float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); - shsum += dirsh * src->sh_p[i1][j1]; + const float dirsh = 1.f / (SQR(ncie->sh_p[i1][j1] - ncie->sh_p[i][j]) + eps); + shsum += dirsh * ncie->sh_p[i1][j1]; norm += dirsh; } } } if (norm > 0.f) { - src->sh_p[i][j] = shsum / norm; + ncie->sh_p[i][j] = shsum / norm; } else if (tot > 0.f) { - src->sh_p[i][j] = sum / tot; + ncie->sh_p[i][j] = sum / tot; } } } @@ -587,18 +548,18 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { if (!badpix[i1 * width + j1]) { - sum += src->sh_p[i1][j1]; + sum += ncie->sh_p[i1][j1]; tot += 1.f; - const float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); - shsum += dirsh * src->sh_p[i1][j1]; + const float dirsh = 1.f / (SQR(ncie->sh_p[i1][j1] - ncie->sh_p[i][j]) + eps); + shsum += dirsh * ncie->sh_p[i1][j1]; norm += dirsh; } } } if (norm > 0.f) { - src->sh_p[i][j] = shsum / norm; + ncie->sh_p[i][j] = shsum / norm; } else if (tot > 0.f) { - src->sh_p[i][j] = sum / tot; + ncie->sh_p[i][j] = sum / tot; } } } @@ -610,18 +571,18 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 < width; j1++) { if (!badpix[i1 * width + j1]) { - sum += src->sh_p[i1][j1]; + sum += ncie->sh_p[i1][j1]; tot += 1.f; - const float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); - shsum += dirsh * src->sh_p[i1][j1]; + const float dirsh = 1.f / (SQR(ncie->sh_p[i1][j1] - ncie->sh_p[i][j]) + eps); + shsum += dirsh * ncie->sh_p[i1][j1]; norm += dirsh; } } } if (norm > 0.f) { - src->sh_p[i][j] = shsum / norm; + ncie->sh_p[i][j] = shsum / norm; } else if (tot > 0.f) { - src->sh_p[i][j] = sum / tot; + ncie->sh_p[i][j] = sum / tot; } } } @@ -651,15 +612,15 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #ifdef __SSE2__ for (; j < width - 3; j += 4) { - const vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(src->h_p[i][j])); - STVFU(sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y); - STVFU(srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x); + const vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(ncie->h_p[i][j])); + STVFU(sraa[i][j], LVFU(ncie->C_p[i][j])*sincosvalv.y); + STVFU(srbb[i][j], LVFU(ncie->C_p[i][j])*sincosvalv.x); } #endif for (; j < width; j++) { - const float2 sincosval = xsincosf(RT_PI_F_180 * src->h_p[i][j]); - sraa[i][j] = src->C_p[i][j] * sincosval.y; - srbb[i][j] = src->C_p[i][j] * sincosval.x; + const float2 sincosval = xsincosf(RT_PI_F_180 * ncie->h_p[i][j]); + sraa[i][j] = ncie->C_p[i][j] * sincosval.y; + srbb[i][j] = ncie->C_p[i][j] * sincosval.x; } } } @@ -786,8 +747,8 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in const float CC = sqrt(SQR(interb) + SQR(intera)); if (CC < chrom) { - src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; - src->C_p[i][j] = CC; + ncie->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; + ncie->C_p[i][j] = CC; } } } @@ -819,8 +780,8 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in selMask = vandm(selMask, vmaskf_lt(CCv, chromv)); if (_mm_movemask_ps((vfloat)selMask)) { - STVFU(src->h_p[i][j], vself(selMask, xatan2f(interbv, interav) / piDiv180v, LVFU(src->h_p[i][j]))); - STVFU(src->C_p[i][j], vself(selMask, CCv, LVFU(src->C_p[i][j]))); + STVFU(ncie->h_p[i][j], vself(selMask, xatan2f(interbv, interav) / piDiv180v, LVFU(ncie->h_p[i][j]))); + STVFU(ncie->C_p[i][j], vself(selMask, CCv, LVFU(ncie->C_p[i][j]))); } } } @@ -845,8 +806,8 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in const float CC = sqrt(SQR(interb) + SQR(intera)); if (CC < chrom) { - src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; - src->C_p[i][j] = CC; + ncie->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; + ncie->C_p[i][j] = CC; } } } @@ -871,8 +832,8 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in const float CC = sqrt(SQR(interb) + SQR(intera)); if (CC < chrom) { - src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; - src->C_p[i][j] = CC; + ncie->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; + ncie->C_p[i][j] = CC; } } } @@ -882,7 +843,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } } -void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, float chrom) +void ImProcFunctions::BadpixelsLab(LabImage * lab, double radius, int thresh, float chrom) { BENCHFUN @@ -892,7 +853,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl const int halfwin = std::ceil(2 * radius) + 1; - const int width = src->W, height = src->H; + const int width = lab->W, height = lab->H; constexpr float eps = 1.f; @@ -907,7 +868,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl #endif { // blur L channel - gaussianBlur(src->L, tmL, width, height, radius / 2.0);//low value to avoid artifacts + gaussianBlur(lab->L, tmL, width, height, radius / 2.0);//low value to avoid artifacts } //luma badpixels @@ -929,12 +890,12 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i = 0; i < height; i++) { int j = 0; for (; j < 2; j++) { - const float shfabs = std::fabs(src->L[i][j] - tmL[i][j]); + const float shfabs = std::fabs(lab->L[i][j] - tmL[i][j]); float shmed = 0.f; for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = 0; j1 <= j + 2; j1++) { - shmed += std::fabs(src->L[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(lab->L[i1][j1] - tmL[i1][j1]); } } badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); @@ -943,36 +904,36 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl #ifdef __SSE2__ for (; j < width - 5; j += 4) { - const vfloat shfabsv = vabsf(LVFU(src->L[i][j]) - LVFU(tmL[i][j])); + const vfloat shfabsv = vabsf(LVFU(lab->L[i][j]) - LVFU(tmL[i][j])); vfloat shmedv = ZEROV; for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { - shmedv += vabsf(LVFU(src->L[i1][j1]) - LVFU(tmL[i1][j1])); + shmedv += vabsf(LVFU(lab->L[i1][j1]) - LVFU(tmL[i1][j1])); } } STVFU(badpix[i * width + j], vselfzero(vmaskf_gt(shfabsv, (shmedv - shfabsv) * shthrv), onev)); } #endif for (; j < width - 2; j++) { - const float shfabs = std::fabs(src->L[i][j] - tmL[i][j]); + const float shfabs = std::fabs(lab->L[i][j] - tmL[i][j]); float shmed = 0.f; for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { - shmed += std::fabs(src->L[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(lab->L[i1][j1] - tmL[i1][j1]); } } badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); } for (; j < width; j++) { - const float shfabs = std::fabs(src->L[i][j] - tmL[i][j]); + const float shfabs = std::fabs(lab->L[i][j] - tmL[i][j]); float shmed = 0.f; for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 < width; j1++) { - shmed += std::fabs(src->L[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(lab->L[i1][j1] - tmL[i1][j1]); } } badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); @@ -993,18 +954,18 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = 0; j1 <= j + 2; j1++) { if (!badpix[i1 * width + j1]) { - sum += src->L[i1][j1]; + sum += lab->L[i1][j1]; tot += 1.f; - const float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); - shsum += dirsh * src->L[i1][j1]; + const float dirsh = 1.f / (SQR(lab->L[i1][j1] - lab->L[i][j]) + eps); + shsum += dirsh * lab->L[i1][j1]; norm += dirsh; } } } if (norm > 0.f) { - src->L[i][j] = shsum / norm; + lab->L[i][j] = shsum / norm; } else if (tot > 0.f) { - src->L[i][j] = sum / tot; + lab->L[i][j] = sum / tot; } } } @@ -1016,18 +977,18 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { if (!badpix[i1 * width + j1]) { - sum += src->L[i1][j1]; + sum += lab->L[i1][j1]; tot += 1.f; - const float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); - shsum += dirsh * src->L[i1][j1]; + const float dirsh = 1.f / (SQR(lab->L[i1][j1] - lab->L[i][j]) + eps); + shsum += dirsh * lab->L[i1][j1]; norm += dirsh; } } } if (norm > 0.f) { - src->L[i][j] = shsum / norm; + lab->L[i][j] = shsum / norm; } else if (tot > 0.f) { - src->L[i][j] = sum / tot; + lab->L[i][j] = sum / tot; } } } @@ -1039,18 +1000,18 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 < width; j1++) { if (!badpix[i1 * width + j1]) { - sum += src->L[i1][j1]; + sum += lab->L[i1][j1]; tot += 1.f; - const float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); - shsum += dirsh * src->L[i1][j1]; + const float dirsh = 1.f / (SQR(lab->L[i1][j1] - lab->L[i][j]) + eps); + shsum += dirsh * lab->L[i1][j1]; norm += dirsh; } } } if (norm > 0.f) { - src->L[i][j] = shsum / norm; + lab->L[i][j] = shsum / norm; } else if (tot > 0.f) { - src->L[i][j] = sum / tot; + lab->L[i][j] = sum / tot; } } } @@ -1067,8 +1028,8 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl #endif { // blur chroma a and b - gaussianBlur(src->a, tmaa, width, height, radius); - gaussianBlur(src->b, tmbb, width, height, radius); + gaussianBlur(lab->a, tmaa, width, height, radius); + gaussianBlur(lab->b, tmbb, width, height, radius); } // begin chroma badpixels @@ -1080,7 +1041,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i = 0; i < height; i++) { for (int j = 0; j < width; j++) { - const float chroma = SQR(src->a[i][j] - tmaa[i][j]) + SQR(src->b[i][j] - tmbb[i][j]); + const float chroma = SQR(lab->a[i][j] - tmaa[i][j]) + SQR(lab->b[i][j] - tmbb[i][j]); chrommed += chroma; badpix[i * width + j] = chroma; } @@ -1134,14 +1095,14 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = 0; j1 < j + halfwin; j1++) { const float wt = badpix[i1 * width + j1]; - atot += wt * src->a[i1][j1]; - btot += wt * src->b[i1][j1]; + atot += wt * lab->a[i1][j1]; + btot += wt * lab->b[i1][j1]; norm += wt; } } if (SQR(atot) + SQR(btot) < chrom * SQR(norm)) { - src->a[i][j] = atot / norm; - src->b[i][j] = btot / norm; + lab->a[i][j] = atot / norm; + lab->b[i][j] = btot / norm; } } } @@ -1157,17 +1118,17 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { const vfloat wtv = LVFU(badpix[i1 * width + j1]); - atotv += wtv * LVFU(src->a[i1][j1]); - btotv += wtv * LVFU(src->b[i1][j1]); + atotv += wtv * LVFU(lab->a[i1][j1]); + btotv += wtv * LVFU(lab->b[i1][j1]); normv += wtv; } } selMask = vandm(selMask, vmaskf_lt(SQRV(atotv) + SQR(btotv), chromv * SQRV(normv))); if (_mm_movemask_ps(reinterpret_cast(selMask))) { - const vfloat aOrig = LVFU(src->a[i][j]); - const vfloat bOrig = LVFU(src->b[i][j]); - STVFU(src->a[i][j], vself(selMask, atotv / normv, aOrig)); - STVFU(src->b[i][j], vself(selMask, btotv / normv, bOrig)); + const vfloat aOrig = LVFU(lab->a[i][j]); + const vfloat bOrig = LVFU(lab->b[i][j]); + STVFU(lab->a[i][j], vself(selMask, atotv / normv, aOrig)); + STVFU(lab->b[i][j], vself(selMask, btotv / normv, bOrig)); } } } @@ -1180,14 +1141,14 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { const float wt = badpix[i1 * width + j1]; - atot += wt * src->a[i1][j1]; - btot += wt * src->b[i1][j1]; + atot += wt * lab->a[i1][j1]; + btot += wt * lab->b[i1][j1]; norm += wt; } } if (SQR(atot) + SQR(btot) < chrom * SQR(norm)) { - src->a[i][j] = atot / norm; - src->b[i][j] = btot / norm; + lab->a[i][j] = atot / norm; + lab->b[i][j] = btot / norm; } } } @@ -1200,14 +1161,14 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = j - halfwin + 1; j1 < width; j1++) { const float wt = badpix[i1 * width + j1]; - atot += wt * src->a[i1][j1]; - btot += wt * src->b[i1][j1]; + atot += wt * lab->a[i1][j1]; + btot += wt * lab->b[i1][j1]; norm += wt; } } if (SQR(atot) + SQR(btot) < chrom * SQR(norm)) { - src->a[i][j] = atot / norm; - src->b[i][j] = btot / norm; + lab->a[i][j] = atot / norm; + lab->b[i][j] = btot / norm; } } } diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 295c5c203..115270090 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -339,10 +339,10 @@ public: void badpixcam (CieImage* ncie, double rad, int thr, int mode, float chrom, bool hotbad); void badpixlab (LabImage* lab, double rad, int thr, float chrom); - void PF_correct_RT (LabImage * src, double radius, int thresh); - void PF_correct_RTcam (CieImage * src, double radius, int thresh); - void Badpixelscam (CieImage * src, double radius, int thresh, int mode, float chrom, bool hotbad); - void BadpixelsLab (LabImage * src, double radius, int thresh, float chrom); + void PF_correct_RT (LabImage * lab, double radius, int thresh); + void PF_correct_RTcam (CieImage * ncie, double radius, int thresh); + void Badpixelscam (CieImage * ncie, double radius, int thresh, int mode, float chrom, bool hotbad); + void BadpixelsLab (LabImage * lab, double radius, int thresh, float chrom); void ToneMapFattal02(Imagefloat *rgb); void localContrast(LabImage *lab); diff --git a/rtengine/procparams.h b/rtengine/procparams.h index ee4c1756f..8e0515e7f 100644 --- a/rtengine/procparams.h +++ b/rtengine/procparams.h @@ -649,7 +649,7 @@ struct ColorAppearanceParams { struct DefringeParams { bool enabled; double radius; - float threshold; + int threshold; std::vector huecurve; DefringeParams();