diff --git a/rtengine/PF_correct_RT.cc b/rtengine/PF_correct_RT.cc index c35c9ae8d..e306bc202 100644 --- a/rtengine/PF_correct_RT.cc +++ b/rtengine/PF_correct_RT.cc @@ -485,7 +485,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh delete [] fringe; } -void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad) +void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, int mode, float chrom, bool hotbad) { BENCHFUN const int halfwin = ceil(2 * radius) + 1; @@ -526,7 +526,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in int j = 0; for (; j < 2; j++) { float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]); - float shmed = 0.0f; + float shmed = 0.f; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) for (int j1 = 0; j1 <= j + 2; j1++ ) { @@ -552,7 +552,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #endif for (; j < width - 2; j++) { float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]); - float shmed = 0.0f; + float shmed = 0.f; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) for (int j1 = j - 2; j1 <= j + 2; j1++ ) { @@ -564,7 +564,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in for (; j < width; j++) { float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]); - float shmed = 0.0f; + float shmed = 0.f; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) for (int j1 = j - 2; j1 < width; j1++ ) { @@ -588,9 +588,9 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in continue; } - float norm = 0.0f; - float shsum = 0.0f; - float sum = 0.0f; + float norm = 0.f; + float shsum = 0.f; + float sum = 0.f; int tot = 0; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) @@ -622,9 +622,9 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in continue; } - float norm = 0.0f; - float shsum = 0.0f; - float sum = 0.0f; + float norm = 0.f; + float shsum = 0.f; + float sum = 0.f; int tot = 0; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) @@ -656,9 +656,9 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in continue; } - float norm = 0.0f; - float shsum = 0.0f; - float sum = 0.0f; + float norm = 0.f; + float shsum = 0.f; + float sum = 0.f; int tot = 0; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) @@ -811,138 +811,175 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } // begin chroma badpixels - double chrommed = 0.0; // use double precision for large summations + if(hotbad) { + double chrommed = 0.0; // use double precision for large summations #ifdef _OPENMP - #pragma omp parallel for reduction(+:chrommed) + #pragma omp parallel for reduction(+:chrommed) #endif - for(int i = 0; i < height; i++ ) { - for(int j = 0; j < width; j++) { - float chroma = SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j]); - chrommed += chroma; - badpix[i * width + j] = chroma; + for(int i = 0; i < height; i++ ) { + for(int j = 0; j < width; j++) { + float chroma = SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j]); + chrommed += chroma; + badpix[i * width + j] = chroma; + } } - } - chrommed /= (height * width); + chrommed /= (height * width); - if(chrommed > 0.0) { - // now chrommed is calculated, so we postprocess badpix to reduce the number of divisions in future + if(chrommed > 0.0) { + // now as chrommed is calculated, we postprocess badpix to reduce the number of divisions in future #ifdef _OPENMP - #pragma omp parallel + #pragma omp parallel #endif - { + { #ifdef __SSE2__ - vfloat chrommedv = F2V(chrommed); - vfloat onev = F2V(1.f); + vfloat chrommedv = F2V(chrommed); + vfloat onev = F2V(1.f); #endif #ifdef _OPENMP #pragma omp for #endif - for(int i = 0; i < height; i++) { - int j = 0; + for(int i = 0; i < height; i++) { + int j = 0; #ifdef __SSE2__ - for(; j < width - 3; j += 4) { - STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + chrommedv)); - } + for(; j < width - 3; j += 4) { + STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + chrommedv)); + } #endif - for(; j < width; j++) { - badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed); + for(; j < width; j++) { + badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed); + } } } - } - const float threshfactor = 1.f / ((thresh * chrommed) / 33.f + chrommed); + const float threshfactor = 1.f / ((thresh * chrommed) / 33.f + chrommed); #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic,16) + #pragma omp parallel for schedule(dynamic,16) #endif - for(int i = 0; i < height; i++ ) { - int j = 0; - for(; j < halfwin; j++) { + for(int i = 0; i < height; i++ ) { + int j = 0; + for(; j < halfwin; j++) { - if (badpix[i * width + j] < threshfactor) { - float atot = 0.f; - float btot = 0.f; - float norm = 0.f; - float wt; + if (badpix[i * width + j] < threshfactor) { + float atot = 0.f; + float btot = 0.f; + float norm = 0.f; + float wt; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) - for (int j1 = 0; j1 < j + halfwin; j1++) { - wt = badpix[i1 * width + j1]; - atot += wt * sraa[i1][j1]; - btot += wt * srbb[i1][j1]; - norm += wt; - } + for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int j1 = 0; j1 < j + halfwin; j1++) { + wt = badpix[i1 * width + j1]; + atot += wt * sraa[i1][j1]; + btot += wt * srbb[i1][j1]; + norm += wt; + } - if(norm > 0.f) { - const float intera = atot / norm; - const float interb = atot / norm; - const float CC = sqrt(SQR(interb) + SQR(intera)); + if(norm > 0.f) { + const float intera = atot / norm; + const float interb = btot / norm; + const float CC = sqrt(SQR(interb) + SQR(intera)); - if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) { - src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; - src->C_p[i][j] = CC; + if(CC < chrom) { + src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; + src->C_p[i][j] = CC; + } } } } - } - for(; j < width - halfwin; j++) { +#ifdef __SSE2__ + vfloat threshfactorv = F2V(threshfactor); + vfloat chromv = F2V(chrom); + vfloat piBy180v = F2V(RT_PI_F_180); + for(; j < width - halfwin - 3; j+=4) { - if (badpix[i * width + j] < threshfactor) { - float atot = 0.f; - float btot = 0.f; - float norm = 0.f; - float wt; + vmask selMask = vmaskf_lt(LVFU(badpix[i * width + j]), threshfactorv); + if(_mm_movemask_ps((vfloat)selMask)) { + vfloat atotv = ZEROV; + vfloat btotv = ZEROV; + vfloat normv = ZEROV; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) - for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { - wt = badpix[i1 * width + j1]; - atot += wt * sraa[i1][j1]; - btot += wt * srbb[i1][j1]; - norm += wt; - } + for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { + vfloat wtv = LVFU(badpix[i1 * width + j1]); + atotv += wtv * LVFU(sraa[i1][j1]); + btotv += wtv * LVFU(srbb[i1][j1]); + normv += wtv; + } - if(norm > 0.f) { - const float intera = atot / norm; - const float interb = atot / norm; - const float CC = sqrt(SQR(interb) + SQR(intera)); + selMask = vandm(selMask, vmaskf_gt(normv, ZEROV)); + if(_mm_movemask_ps((vfloat)selMask)) { + vfloat interav = atotv / normv; + vfloat interbv = btotv / normv; + vfloat CCv = vsqrtf(SQRV(interbv) + SQRV(interav)); - if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) { - src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; - src->C_p[i][j] = CC; + selMask = vandm(selMask, vmaskf_lt(CCv, chromv)); + if(_mm_movemask_ps((vfloat)selMask)) { + STVFU(src->h_p[i][j], xatan2f(interbv, interav) / piBy180v); + STVFU(src->C_p[i][j], CCv); + } } } } - } +#endif + for(; j < width - halfwin; j++) { - for(; j < width; j++) { + if (badpix[i * width + j] < threshfactor) { + float atot = 0.f; + float btot = 0.f; + float norm = 0.f; + float wt; - if (badpix[i * width + j] < threshfactor) { - float atot = 0.f; - float btot = 0.f; - float norm = 0.f; - float wt; + for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { + wt = badpix[i1 * width + j1]; + atot += wt * sraa[i1][j1]; + btot += wt * srbb[i1][j1]; + norm += wt; + } - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) - for (int j1 = j - halfwin + 1; j1 < width; j1++) { - wt = badpix[i1 * width + j1]; - atot += wt * sraa[i1][j1]; - btot += wt * srbb[i1][j1]; - norm += wt; + if(norm > 0.f) { + const float intera = atot / norm; + const float interb = btot / norm; + const float CC = sqrt(SQR(interb) + SQR(intera)); + + if(CC < chrom) { + src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; + src->C_p[i][j] = CC; + } } + } + } - if(norm > 0.f) { - const float intera = atot / norm; - const float interb = atot / norm; - const float CC = sqrt(SQR(interb) + SQR(intera)); + for(; j < width; j++) { - if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) { - src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; - src->C_p[i][j] = CC; + if (badpix[i * width + j] < threshfactor) { + float atot = 0.f; + float btot = 0.f; + float norm = 0.f; + float wt; + + for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int j1 = j - halfwin + 1; j1 < width; j1++) { + wt = badpix[i1 * width + j1]; + atot += wt * sraa[i1][j1]; + btot += wt * srbb[i1][j1]; + norm += wt; + } + + if(norm > 0.f) { + const float intera = atot / norm; + const float interb = btot / norm; + const float CC = sqrt(SQR(interb) + SQR(intera)); + + if(CC < chrom) { + src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; + src->C_p[i][j] = CC; + } } } } @@ -994,7 +1031,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl int j = 0; for (; j < 2; j++) { float shfabs = fabs(src->L[i][j] - tmL[i][j]); - float shmed = 0.0f; + float shmed = 0.f; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { for (int j1 = 0; j1 <= j + 2; j1++) { @@ -1020,7 +1057,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl #endif for (; j < width - 2; j++) { float shfabs = fabs(src->L[i][j] - tmL[i][j]); - float shmed = 0.0f; + float shmed = 0.f; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { @@ -1032,7 +1069,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (; j < width; j++) { float shfabs = fabs(src->L[i][j] - tmL[i][j]); - float shmed = 0.0f; + float shmed = 0.f; for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 < width; j1++) { diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 36c742c51..b8e783a89 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -1485,17 +1485,17 @@ void ImProcFunctions::ciecam_02 (CieImage* ncie, double adap, int pW, int pwb, L //if(params->dirpyrequalizer.enabled) if(execsharp) { if (params->dirpyrequalizer.enabled) { - if (params->dirpyrequalizer.gamutlab /*&& execsharp*/) { + if (params->dirpyrequalizer.gamutlab) { constexpr float artifact = 4.f; constexpr float chrom = 50.f; - constexpr int hotbad = 0; - ImProcFunctions::badpixcam (ncie, artifact, 5, 2, params->dirpyrequalizer.skinprotect, chrom, hotbad); //enabled remove artifacts for cbDL + const bool hotbad = params->dirpyrequalizer.skinprotect != 0.0; + ImProcFunctions::badpixcam (ncie, artifact, 5, 2, chrom, hotbad); //enabled remove artifacts for cbDL } } if (params->colorappearance.badpixsl > 0) if (execsharp) { int mode = params->colorappearance.badpixsl; - ImProcFunctions::badpixcam (ncie, 3.4, 5, mode, 0, 0, 1);//for bad pixels CIECAM + ImProcFunctions::badpixcam (ncie, 3.4, 5, mode, 0, true);//for bad pixels CIECAM } if (params->sharpenMicro.enabled)if (execsharp) { @@ -2905,13 +2905,13 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw //if(params->dirpyrequalizer.enabled) if(execsharp) { if (params->dirpyrequalizer.enabled) { - if (params->dirpyrequalizer.gamutlab /*&& execsharp*/) { //remove artifacts by gaussian blur - skin control + if (params->dirpyrequalizer.gamutlab) { //remove artifacts by gaussian blur - skin control constexpr float artifact = 4.f; - constexpr int hotbad = 0; constexpr float chrom = 50.f; + const bool hotbad = params->dirpyrequalizer.skinprotect != 0.0; lab->deleteLab(); - ImProcFunctions::badpixcam (ncie, artifact, 5, 2, params->dirpyrequalizer.skinprotect, chrom, hotbad); //enabled remove artifacts for cbDL + ImProcFunctions::badpixcam (ncie, artifact, 5, 2, chrom, hotbad); //enabled remove artifacts for cbDL lab->reallocLab(); } } @@ -2920,7 +2920,7 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw if (params->colorappearance.badpixsl > 0) if (execsharp) { int mode = params->colorappearance.badpixsl; lab->deleteLab(); - ImProcFunctions::badpixcam (ncie, 3.0, 10, mode, 0, 0, 1);//for bad pixels CIECAM + ImProcFunctions::badpixcam (ncie, 3.0, 10, mode, 0, true);//for bad pixels CIECAM lab->reallocLab(); } @@ -6282,10 +6282,10 @@ void ImProcFunctions::defringecam (CieImage* ncie) } } -void ImProcFunctions::badpixcam (CieImage* ncie, double rad, int thr, int mode, float skinprot, float chrom, int hotbad) +void ImProcFunctions::badpixcam (CieImage* ncie, double rad, int thr, int mode, float chrom, bool hotbad) { if (ncie->W >= 8 && ncie->H >= 8) { - Badpixelscam (ncie, rad, thr, mode, skinprot, chrom, hotbad); + Badpixelscam (ncie, rad, thr, mode, chrom, hotbad); } } diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index ac9370c98..ec74f6d9e 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -336,12 +336,12 @@ public: void idirpyr_eq_channelcam (float ** data_coarse, float ** data_fine, float ** buffer, int width, int height, int level, float multi[6], const double dirpyrThreshold, float ** l_a_h, float ** l_b_c, const double skinprot, float b_l, float t_l, float t_r); void defringe (LabImage* lab); void defringecam (CieImage* ncie); - void badpixcam (CieImage* ncie, double rad, int thr, int mode, float skinprot, float chrom, int hotbad); + void badpixcam (CieImage* ncie, double rad, int thr, int mode, float chrom, bool hotbad); void badpixlab (LabImage* lab, double rad, int thr, float skinprot, float chrom); void PF_correct_RT (LabImage * src, double radius, int thresh); void PF_correct_RTcam (CieImage * src, double radius, int thresh); - void Badpixelscam (CieImage * src, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad); + void Badpixelscam (CieImage * src, double radius, int thresh, int mode, float chrom, bool hotbad); void BadpixelsLab (LabImage * src, double radius, int thresh, float chrom); void ToneMapFattal02(Imagefloat *rgb);