diff --git a/rtengine/PF_correct_RT.cc b/rtengine/PF_correct_RT.cc index 9454bae7c..2b5a7e713 100644 --- a/rtengine/PF_correct_RT.cc +++ b/rtengine/PF_correct_RT.cc @@ -36,15 +36,13 @@ #define BENCHMARK #include "StopWatch.h" -using namespace std; - namespace rtengine { void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) { BENCHFUN - const int halfwin = ceil(2 * radius) + 1; + const int halfwin = std::ceil(2 * radius) + 1; std::unique_ptr chCurve; if (params->defringe.huecurve.size() && FlatCurveType(params->defringe.huecurve.at(0)) > FCT_Linear) { @@ -55,10 +53,10 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) const int width = src->W, height = src->H; //temporary array to store chromaticity - std::unique_ptr fringe(new float[width * height]); + const std::unique_ptr fringe(new float[width * height]); - const JaggedArray tmpa(width, height); - const JaggedArray tmpb(width, height); + JaggedArray tmpa(width, height); + JaggedArray tmpb(width, height); #ifdef _OPENMP #pragma omp parallel @@ -79,58 +77,58 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) #pragma omp for reduction(+:chromave) #endif - for(int i = 0; i < height; i++ ) { + for (int i = 0; i < height; i++) { #ifdef __SSE2__ // vectorized per row precalculation of the atan2 values if (chCurve) { int k = 0; - for(; k < width - 3; k += 4) { + for (; k < width - 3; k += 4) { STVFU(fringe[i * width + k], xatan2f(LVFU(src->b[i][k]), LVFU(src->a[i][k]))); } - for(; k < width; k++) { + for (; k < width; k++) { fringe[i * width + k] = xatan2f(src->b[i][k], src->a[i][k]); } } #endif - for(int j = 0; j < width; j++) { + for (int j = 0; j < width; j++) { if (chCurve) { #ifdef __SSE2__ // use the precalculated atan values - float HH = fringe[i * width + j]; + const float HH = fringe[i * width + j]; #else // no precalculated values without SSE => calculate - float HH = xatan2f(src->b[i][j], src->a[i][j]); + const float HH = xatan2f(src->b[i][j], src->a[i][j]); #endif - float chparam = chCurve->getVal((Color::huelab_to_huehsv2(HH))) - 0.5f; //get C=f(H) + float chparam = chCurve->getVal((Color::huelab_to_huehsv2(HH))) - 0.5f; // get C=f(H) - if(chparam < 0.f) { + if (chparam < 0.f) { chparam *= 2.f; // increased action if chparam < 0 } chromaChfactor = SQR(1.f + chparam); } - float chroma = chromaChfactor * (SQR(src->a[i][j] - tmpa[i][j]) + SQR(src->b[i][j] - tmpb[i][j])); //modulate chroma function hue + const float chroma = chromaChfactor * (SQR(src->a[i][j] - tmpa[i][j]) + SQR(src->b[i][j] - tmpb[i][j])); // modulate chroma function hue chromave += chroma; fringe[i * width + j] = chroma; } } } - chromave /= (height * width); + chromave /= height * width; - if(chromave > 0.0) { + if (chromave > 0.0) { // now as chromave is calculated, we postprocess fringe to reduce the number of divisions in future #ifdef _OPENMP #pragma omp parallel for simd #endif - for(int j = 0; j < width * height; j++) { + for (int j = 0; j < width * height; j++) { fringe[j] = 1.f / (fringe[j] + chromave); } @@ -146,18 +144,18 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) #pragma omp parallel for schedule(dynamic,16) #endif - for(int i = 0; i < height; i++ ) { + for (int i = 0; i < height; i++) { int j = 0; - for(; j < halfwin - 1; j++) { + for (; j < halfwin - 1; j++) { //test for pixel darker than some fraction of neighbourhood ave, near an edge, more saturated than average if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = 0; j1 < j + halfwin; j1++) { //neighbourhood average of pixels weighted by chrominance - float wt = fringe[i1 * width + j1]; + const float wt = fringe[i1 * width + j1]; atot += wt * src->a[i1][j1]; btot += wt * src->b[i1][j1]; norm += wt; @@ -168,16 +166,16 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) } } - for(; j < width - halfwin + 1; j++) { + for (; j < width - halfwin + 1; j++) { //test for pixel darker than some fraction of neighbourhood ave, near an edge, more saturated than average if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { //neighbourhood average of pixels weighted by chrominance - float wt = fringe[i1 * width + j1]; + const float wt = fringe[i1 * width + j1]; atot += wt * src->a[i1][j1]; btot += wt * src->b[i1][j1]; norm += wt; @@ -188,16 +186,16 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) } } - for(; j < width; j++) { + for (; j < width; j++) { //test for pixel darker than some fraction of neighbourhood ave, near an edge, more saturated than average if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = j - halfwin + 1; j1 < width; j1++) { //neighbourhood average of pixels weighted by chrominance - float wt = fringe[i1 * width + j1]; + const float wt = fringe[i1 * width + j1]; atot += wt * src->a[i1][j1]; btot += wt * src->b[i1][j1]; norm += wt; @@ -214,7 +212,7 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, double radius, int thresh) void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh) { BENCHFUN - const int halfwin = ceil(2 * radius) + 1; + const int halfwin = std::ceil(2 * radius) + 1; std::unique_ptr chCurve; @@ -226,19 +224,19 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh const int width = src->W, height = src->H; //temporary array to store chromaticity - std::unique_ptr fringe(new float[width * height]); + const std::unique_ptr fringe(new float[width * height]); - float **sraa = src->h_p; // we use the src->h_p buffer to avoid memory allocation/deallocation and reduce memory pressure - float **srbb = src->C_p; // we use the src->C_p buffer to avoid memory allocation/deallocation and reduce memory pressure - const JaggedArray tmaa(width, height); - const JaggedArray tmbb(width, height); + float** const sraa = src->h_p; // we use the src->h_p buffer to avoid memory allocation/deallocation and reduce memory pressure + float** const srbb = src->C_p; // we use the src->C_p buffer to avoid memory allocation/deallocation and reduce memory pressure + JaggedArray tmaa(width, height); + JaggedArray tmbb(width, height); #ifdef _OPENMP #pragma omp parallel #endif { #ifdef __SSE2__ - vfloat piDiv180v = F2V(RT_PI_F_180); + const vfloat piDiv180v = F2V(RT_PI_F_180); #endif #ifdef _OPENMP #pragma omp for @@ -249,13 +247,13 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh #ifdef __SSE2__ for (; j < width - 3; j += 4) { - vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(src->h_p[i][j])); + const vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(src->h_p[i][j])); STVFU(sraa[i][j], LVFU(src->C_p[i][j]) * sincosvalv.y); STVFU(srbb[i][j], LVFU(src->C_p[i][j]) * sincosvalv.x); } #endif for (; j < width; j++) { - float2 sincosval = xsincosf(RT_PI_F_180 * src->h_p[i][j]); + const float2 sincosval = xsincosf(RT_PI_F_180 * src->h_p[i][j]); sraa[i][j] = src->C_p[i][j] * sincosval.y; srbb[i][j] = src->C_p[i][j] * sincosval.x; } @@ -272,7 +270,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh #ifdef __SSE2__ - if(chCurve) { + if (chCurve) { // vectorized precalculation of the atan2 values #ifdef _OPENMP #pragma omp parallel @@ -282,13 +280,13 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh #pragma omp for #endif - for(int i = 0; i < height; i++ ) { + for (int i = 0; i < height; i++) { int j = 0; - for(; j < width - 3; j += 4) { + for (; j < width - 3; j += 4) { STVFU(fringe[i * width + j], xatan2f(LVFU(srbb[i][j]), LVFU(sraa[i][j]))); } - for(; j < width; j++) { + for (; j < width; j++) { fringe[i * width + j] = xatan2f(srbb[i][j], sraa[i][j]); } } @@ -308,41 +306,41 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh #pragma omp for reduction(+:chromave) #endif - for(int i = 0; i < height; i++ ) { - for(int j = 0; j < width; j++) { + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { if (chCurve) { #ifdef __SSE2__ // use the precalculated atan2 values - float HH = fringe[i * width + j]; + const float HH = fringe[i * width + j]; #else // no precalculated values without SSE => calculate - float HH = xatan2f(srbb[i][j], sraa[i][j]); + const float HH = xatan2f(srbb[i][j], sraa[i][j]); #endif float chparam = chCurve->getVal(Color::huelab_to_huehsv2(HH)) - 0.5f; //get C=f(H) - if(chparam < 0.f) { + if (chparam < 0.f) { chparam *= 2.f; // increase action if chparam < 0 } chromaChfactor = SQR(1.f + chparam); } - float chroma = chromaChfactor * (SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j])); //modulate chroma function hue + const float chroma = chromaChfactor * (SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j])); //modulate chroma function hue chromave += chroma; fringe[i * width + j] = chroma; } } } - chromave /= (height * width); + chromave /= height * width; - if(chromave > 0.0) { + if (chromave > 0.0) { // now as chromave is calculated, we postprocess fringe to reduce the number of divisions in future #ifdef _OPENMP #pragma omp parallel for simd #endif - for(int j = 0; j < width * height; j++) { + for (int j = 0; j < width * height; j++) { fringe[j] = 1.f / (fringe[j] + chromave); } @@ -358,71 +356,71 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh #pragma omp parallel for schedule(dynamic,16) #endif - for(int i = 0; i < height; i++ ) { + for (int i = 0; i < height; i++) { int j = 0; - for(; j < halfwin - 1; j++) { + for (; j < halfwin - 1; j++) { tmaa[i][j] = sraa[i][j]; tmbb[i][j] = srbb[i][j]; if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = 0; j1 < j + halfwin; j1++) { //neighbourhood average of pixels weighted by chrominance - float wt = fringe[i1 * width + j1]; + const float wt = fringe[i1 * width + j1]; atot += wt * sraa[i1][j1]; btot += wt * srbb[i1][j1]; norm += wt; } - if(norm > 0.f) { + if (norm > 0.f) { tmaa[i][j] = atot / norm; tmbb[i][j] = btot / norm; } } } - for(; j < width - halfwin + 1; j++) { + for (; j < width - halfwin + 1; j++) { tmaa[i][j] = sraa[i][j]; tmbb[i][j] = srbb[i][j]; if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { //neighbourhood average of pixels weighted by chrominance - float wt = fringe[i1 * width + j1]; + const float wt = fringe[i1 * width + j1]; atot += wt * sraa[i1][j1]; btot += wt * srbb[i1][j1]; norm += wt; } - if(norm > 0.f) { + if (norm > 0.f) { tmaa[i][j] = atot / norm; tmbb[i][j] = btot / norm; } } } - for(; j < width; j++) { + for (; j < width; j++) { tmaa[i][j] = sraa[i][j]; tmbb[i][j] = srbb[i][j]; if (fringe[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = j - halfwin + 1; j1 < width; j1++) { //neighbourhood average of pixels weighted by chrominance - float wt = fringe[i1 * width + j1]; + const float wt = fringe[i1 * width + j1]; atot += wt * sraa[i1][j1]; btot += wt * srbb[i1][j1]; norm += wt; } - if(norm > 0.f) { + if (norm > 0.f) { tmaa[i][j] = atot / norm; tmbb[i][j] = btot / norm; } @@ -434,20 +432,20 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh #pragma omp parallel for #endif - for(int i = 0; i < height; i++ ) { + for (int i = 0; i < height; i++) { int j = 0; #ifdef __SSE2__ - for(; j < width - 3; j += 4) { - vfloat interav = LVFU(tmaa[i][j]); - vfloat interbv = LVFU(tmbb[i][j]); + for (; j < width - 3; j += 4) { + const vfloat interav = LVFU(tmaa[i][j]); + const vfloat interbv = LVFU(tmbb[i][j]); STVFU(src->h_p[i][j], xatan2f(interbv, interav) / F2V(RT_PI_F_180)); STVFU(src->C_p[i][j], vsqrtf(SQRV(interbv) + SQRV(interav))); } #endif - for(; j < width; j++) { - float intera = tmaa[i][j]; - float interb = tmbb[i][j]; + for (; j < width; j++) { + const float intera = tmaa[i][j]; + const float interb = tmbb[i][j]; src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; src->C_p[i][j] = sqrt(SQR(interb) + SQR(intera)); } @@ -458,7 +456,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, int mode, float chrom, bool hotbad) { BENCHFUN - if(mode == 2 && radius < 0.25) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here + if (mode == 2 && radius < 0.25) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here return; } @@ -466,11 +464,11 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in constexpr float eps = 1.f; - const JaggedArray tmL(width, height); + JaggedArray tmL(width, height); - std::unique_ptr badpix(new float[width * height]); + const std::unique_ptr badpix(new float[width * height]); - if(radius >= 0.5) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here + if (radius >= 0.5) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here #ifdef _OPENMP #pragma omp parallel #endif @@ -488,8 +486,8 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #endif { #ifdef __SSE2__ - vfloat shthrv = F2V(shthr); - vfloat onev = F2V(1.f); + const vfloat shthrv = F2V(shthr); + const vfloat onev = F2V(1.f); #endif // __SSE2__ #ifdef _OPENMP #pragma omp for @@ -498,53 +496,57 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in for (int i = 0; i < height; i++) { int j = 0; for (; j < 2; j++) { - float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]); + const float shfabs = std::fabs(src->sh_p[i][j] - tmL[i][j]); float shmed = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) - for (int j1 = 0; j1 <= j + 2; j1++ ) { - shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]); + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { + for (int j1 = 0; j1 <= j + 2; j1++) { + shmed += std::fabs(src->sh_p[i1][j1] - tmL[i1][j1]); } + } - badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr)); + badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); } #ifdef __SSE2__ for (; j < width - 5; j += 4) { - vfloat shfabsv = vabsf(LVFU(src->sh_p[i][j]) - LVFU(tmL[i][j])); + const vfloat shfabsv = vabsf(LVFU(src->sh_p[i][j]) - LVFU(tmL[i][j])); vfloat shmedv = ZEROV; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) - for (int j1 = j - 2; j1 <= j + 2; j1++ ) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { + for (int j1 = j - 2; j1 <= j + 2; j1++) { shmedv += vabsf(LVFU(src->sh_p[i1][j1]) - LVFU(tmL[i1][j1])); } + } STVFU(badpix[i * width + j], vselfzero(vmaskf_gt(shfabsv, (shmedv - shfabsv) * shthrv), onev)); } #endif for (; j < width - 2; j++) { - float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]); + const float shfabs = std::fabs(src->sh_p[i][j] - tmL[i][j]); float shmed = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) - for (int j1 = j - 2; j1 <= j + 2; j1++ ) { - shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]); + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { + for (int j1 = j - 2; j1 <= j + 2; j1++) { + shmed += std::fabs(src->sh_p[i1][j1] - tmL[i1][j1]); } + } - badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr)); + badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); } for (; j < width; j++) { - float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]); + const float shfabs = std::fabs(src->sh_p[i][j] - tmL[i][j]); float shmed = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) - for (int j1 = j - 2; j1 < width; j1++ ) { - shmed += fabs(src->sh_p[i1][j1] - tmL[i1][j1]); + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { + for (int j1 = j - 2; j1 < width; j1++) { + shmed += std::fabs(src->sh_p[i1][j1] - tmL[i1][j1]); } + } - badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr)); + badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); } } } @@ -559,12 +561,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in if (badpix[i * width + j]) { float norm = 0.f, shsum = 0.f, sum = 0.f, tot = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) { - for (int j1 = 0; j1 <= j + 2; j1++ ) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { + for (int j1 = 0; j1 <= j + 2; j1++) { if (!badpix[i1 * width + j1]) { sum += src->sh_p[i1][j1]; tot += 1.f; - float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); + const float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); shsum += dirsh * src->sh_p[i1][j1]; norm += dirsh; } @@ -582,12 +584,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in if (badpix[i * width + j]) { float norm = 0.f, shsum = 0.f, sum = 0.f, tot = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) { - for (int j1 = j - 2; j1 <= j + 2; j1++ ) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { + for (int j1 = j - 2; j1 <= j + 2; j1++) { if (!badpix[i1 * width + j1]) { sum += src->sh_p[i1][j1]; tot += 1.f; - float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); + const float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); shsum += dirsh * src->sh_p[i1][j1]; norm += dirsh; } @@ -595,7 +597,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } if (norm > 0.f) { src->sh_p[i][j] = shsum / norm; - } else if(tot > 0.f) { + } else if (tot > 0.f) { src->sh_p[i][j] = sum / tot; } } @@ -605,12 +607,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in if (badpix[i * width + j]) { float norm = 0.f, shsum = 0.f, sum = 0.f, tot = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) { - for (int j1 = j - 2; j1 < width; j1++ ) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { + for (int j1 = j - 2; j1 < width; j1++) { if (!badpix[i1 * width + j1]) { sum += src->sh_p[i1][j1]; tot += 1.f; - float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); + const float dirsh = 1.f / (SQR(src->sh_p[i1][j1] - src->sh_p[i][j]) + eps); shsum += dirsh * src->sh_p[i1][j1]; norm += dirsh; } @@ -618,7 +620,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } if (norm > 0.f) { src->sh_p[i][j] = shsum / norm; - } else if(tot > 0.f) { + } else if (tot > 0.f) { src->sh_p[i][j] = sum / tot; } } @@ -628,10 +630,9 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in // end luma badpixels - if(hotbad) { - - const JaggedArray sraa(width, height); - const JaggedArray srbb(width, height); + if (hotbad) { + JaggedArray sraa(width, height); + JaggedArray srbb(width, height); #ifdef _OPENMP #pragma omp parallel @@ -639,7 +640,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in { #ifdef __SSE2__ - vfloat piDiv180v = F2V(RT_PI_F_180); + const vfloat piDiv180v = F2V(RT_PI_F_180); #endif // __SSE2__ #ifdef _OPENMP #pragma omp for @@ -650,24 +651,23 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #ifdef __SSE2__ for (; j < width - 3; j += 4) { - vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(src->h_p[i][j])); + const vfloat2 sincosvalv = xsincosf(piDiv180v * LVFU(src->h_p[i][j])); STVFU(sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y); STVFU(srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x); } #endif for (; j < width; j++) { - float2 sincosval = xsincosf(RT_PI_F_180 * src->h_p[i][j]); + const float2 sincosval = xsincosf(RT_PI_F_180 * src->h_p[i][j]); sraa[i][j] = src->C_p[i][j] * sincosval.y; srbb[i][j] = src->C_p[i][j] * sincosval.x; } } } - float ** tmaa = tmL; // reuse tmL buffer - const JaggedArray tmbb(width, height); - - if(mode == 2) { //choice of gaussian blur + float** const tmaa = tmL; // reuse tmL buffer + JaggedArray tmbb(width, height); + if (mode == 2) { //choice of gaussian blur #ifdef _OPENMP #pragma omp parallel #endif @@ -677,7 +677,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in gaussianBlur(srbb, tmbb, width, height, radius); } - } else if(mode == 1) { //choice of median + } else if (mode == 1) { //choice of median #ifdef _OPENMP #pragma omp parallel #endif @@ -687,12 +687,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #endif for (int i = 0; i < height; i++) { - int ip = i < 2 ? i + 2 : i -2; - int in = i > height - 3 ? i - 2 : i + 2; + const int ip = i < 2 ? i + 2 : i - 2; + const int in = i > height - 3 ? i - 2 : i + 2; for (int j = 0; j < width; j++) { - int jp = j < 2 ? j + 2 : j -2; - int jn = j > width - 3 ? j - 2 : j + 2; + const int jp = j < 2 ? j + 2 : j -2; + const int jn = j > width - 3 ? j - 2 : j + 2; tmaa[i][j] = median(sraa[ip][jp], sraa[ip][j], sraa[ip][jn], sraa[i][jp], sraa[i][j], sraa[i][jn], sraa[in][jp], sraa[in][j], sraa[in][jn]); } @@ -702,12 +702,12 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #pragma omp for #endif for (int i = 0; i < height; i++) { - int ip = i < 2 ? i + 2 : i -2; - int in = i > height - 3 ? i - 2 : i + 2; + const int ip = i < 2 ? i + 2 : i - 2; + const int in = i > height - 3 ? i - 2 : i + 2; for (int j = 0; j < width; j++) { - int jp = j < 2 ? j + 2 : j -2; - int jn = j > width - 3 ? j - 2 : j + 2; + const int jp = j < 2 ? j + 2 : j -2; + const int jn = j > width - 3 ? j - 2 : j + 2; tmbb[i][j] = median(srbb[ip][jp], srbb[ip][j], srbb[ip][jn], srbb[i][jp], srbb[i][j], srbb[i][jn], srbb[in][jp], srbb[in][j], srbb[in][jn]); } @@ -721,71 +721,71 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in #pragma omp parallel for reduction(+:chrommed) #endif - for(int i = 0; i < height; i++ ) { - for(int j = 0; j < width; j++) { - float chroma = SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j]); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + const float chroma = SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j]); chrommed += chroma; badpix[i * width + j] = chroma; } } - chrommed /= (height * width); + chrommed /= height * width; - if(chrommed > 0.0) { + if (chrommed > 0.0) { // now as chrommed is calculated, we postprocess badpix to reduce the number of divisions in future #ifdef _OPENMP #pragma omp parallel #endif { #ifdef __SSE2__ - vfloat chrommedv = F2V(chrommed); - vfloat onev = F2V(1.f); + const vfloat chrommedv = F2V(chrommed); + const vfloat onev = F2V(1.f); #endif #ifdef _OPENMP #pragma omp for #endif - for(int i = 0; i < height; i++) { + for (int i = 0; i < height; i++) { int j = 0; #ifdef __SSE2__ - for(; j < width - 3; j += 4) { + for (; j < width - 3; j += 4) { STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + chrommedv)); } #endif - for(; j < width; j++) { + for (; j < width; j++) { badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed); } } } const float threshfactor = 1.f / ((thresh * chrommed) / 33.f + chrommed); - const int halfwin = ceil(2 * radius) + 1; + const int halfwin = std::ceil(2 * radius) + 1; #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif - for(int i = 0; i < height; i++ ) { + for (int i = 0; i < height; i++) { int j = 0; - for(; j < halfwin; j++) { + for (; j < halfwin; j++) { if (badpix[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = 0; j1 < j + halfwin; j1++) { - float wt = badpix[i1 * width + j1]; + const float wt = badpix[i1 * width + j1]; atot += wt * sraa[i1][j1]; btot += wt * srbb[i1][j1]; norm += wt; } - if(norm > 0.f) { + if (norm > 0.f) { const float intera = atot / norm; const float interb = btot / norm; const float CC = sqrt(SQR(interb) + SQR(intera)); - if(CC < chrom) { + if (CC < chrom) { src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; src->C_p[i][j] = CC; } @@ -794,31 +794,31 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } #ifdef __SSE2__ - vfloat threshfactorv = F2V(threshfactor); - vfloat chromv = F2V(chrom); - vfloat piDiv180v = F2V(RT_PI_F_180); - for(; j < width - halfwin - 3; j+=4) { + const vfloat threshfactorv = F2V(threshfactor); + const vfloat chromv = F2V(chrom); + const vfloat piDiv180v = F2V(RT_PI_F_180); + for (; j < width - halfwin - 3; j+=4) { vmask selMask = vmaskf_lt(LVFU(badpix[i * width + j]), threshfactorv); - if(_mm_movemask_ps((vfloat)selMask)) { + if (_mm_movemask_ps((vfloat)selMask)) { vfloat atotv = ZEROV, btotv = ZEROV, normv = ZEROV; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { - vfloat wtv = LVFU(badpix[i1 * width + j1]); + const vfloat wtv = LVFU(badpix[i1 * width + j1]); atotv += wtv * LVFU(sraa[i1][j1]); btotv += wtv * LVFU(srbb[i1][j1]); normv += wtv; } selMask = vandm(selMask, vmaskf_gt(normv, ZEROV)); - if(_mm_movemask_ps((vfloat)selMask)) { - vfloat interav = atotv / normv; - vfloat interbv = btotv / normv; - vfloat CCv = vsqrtf(SQRV(interbv) + SQRV(interav)); + if (_mm_movemask_ps((vfloat)selMask)) { + const vfloat interav = atotv / normv; + const vfloat interbv = btotv / normv; + const vfloat CCv = vsqrtf(SQRV(interbv) + SQRV(interav)); selMask = vandm(selMask, vmaskf_lt(CCv, chromv)); - if(_mm_movemask_ps((vfloat)selMask)) { + if (_mm_movemask_ps((vfloat)selMask)) { STVFU(src->h_p[i][j], vself(selMask, xatan2f(interbv, interav) / piDiv180v, LVFU(src->h_p[i][j]))); STVFU(src->C_p[i][j], vself(selMask, CCv, LVFU(src->C_p[i][j]))); } @@ -826,25 +826,25 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } } #endif - for(; j < width - halfwin; j++) { + for (; j < width - halfwin; j++) { if (badpix[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { - float wt = badpix[i1 * width + j1]; + const float wt = badpix[i1 * width + j1]; atot += wt * sraa[i1][j1]; btot += wt * srbb[i1][j1]; norm += wt; } - if(norm > 0.f) { + if (norm > 0.f) { const float intera = atot / norm; const float interb = btot / norm; const float CC = sqrt(SQR(interb) + SQR(intera)); - if(CC < chrom) { + if (CC < chrom) { src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; src->C_p[i][j] = CC; } @@ -852,25 +852,25 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in } } - for(; j < width; j++) { + for (; j < width; j++) { if (badpix[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) for (int j1 = j - halfwin + 1; j1 < width; j1++) { - float wt = badpix[i1 * width + j1]; + const float wt = badpix[i1 * width + j1]; atot += wt * sraa[i1][j1]; btot += wt * srbb[i1][j1]; norm += wt; } - if(norm > 0.f) { + if (norm > 0.f) { const float intera = atot / norm; const float interb = btot / norm; const float CC = sqrt(SQR(interb) + SQR(intera)); - if(CC < chrom) { + if (CC < chrom) { src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180; src->C_p[i][j] = CC; } @@ -886,21 +886,21 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl { BENCHFUN - if(radius < 0.25) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here + if (radius < 0.25) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here return; } - const int halfwin = ceil(2 * radius) + 1; + const int halfwin = std::ceil(2 * radius) + 1; const int width = src->W, height = src->H; constexpr float eps = 1.f; - const JaggedArray tmL(width, height); + JaggedArray tmL(width, height); - std::unique_ptr badpix(new float[width * height]); + const std::unique_ptr badpix(new float[width * height]); - if(radius >= 0.5) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here + if (radius >= 0.5) { // for gauss sigma less than 0.25 gaussianblur() just calls memcpy => nothing to do here #ifdef _OPENMP #pragma omp parallel @@ -919,8 +919,8 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl #endif { #ifdef __SSE2__ - vfloat shthrv = F2V(shthr); - vfloat onev = F2V(1.f); + const vfloat shthrv = F2V(shthr); + const vfloat onev = F2V(1.f); #endif #ifdef _OPENMP #pragma omp for @@ -929,24 +929,24 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl for (int i = 0; i < height; i++) { int j = 0; for (; j < 2; j++) { - float shfabs = fabs(src->L[i][j] - tmL[i][j]); + const float shfabs = std::fabs(src->L[i][j] - tmL[i][j]); float shmed = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = 0; j1 <= j + 2; j1++) { - shmed += fabs(src->L[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(src->L[i1][j1] - tmL[i1][j1]); } } - badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr)); + badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); } #ifdef __SSE2__ for (; j < width - 5; j += 4) { - vfloat shfabsv = vabsf(LVFU(src->L[i][j]) - LVFU(tmL[i][j])); + const vfloat shfabsv = vabsf(LVFU(src->L[i][j]) - LVFU(tmL[i][j])); vfloat shmedv = ZEROV; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { shmedv += vabsf(LVFU(src->L[i1][j1]) - LVFU(tmL[i1][j1])); } @@ -955,27 +955,27 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl } #endif for (; j < width - 2; j++) { - float shfabs = fabs(src->L[i][j] - tmL[i][j]); + const float shfabs = std::fabs(src->L[i][j] - tmL[i][j]); float shmed = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { - shmed += fabs(src->L[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(src->L[i1][j1] - tmL[i1][j1]); } } - badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr)); + badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); } for (; j < width; j++) { - float shfabs = fabs(src->L[i][j] - tmL[i][j]); + const float shfabs = std::fabs(src->L[i][j] - tmL[i][j]); float shmed = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 < width; j1++) { - shmed += fabs(src->L[i1][j1] - tmL[i1][j1]); + shmed += std::fabs(src->L[i1][j1] - tmL[i1][j1]); } } - badpix[i * width + j] = (shfabs > ((shmed - shfabs) * shthr)); + badpix[i * width + j] = shfabs > ((shmed - shfabs) * shthr); } } } @@ -990,12 +990,12 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl if (badpix[i * width + j]) { float norm = 0.f, shsum = 0.f, sum = 0.f, tot = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = 0; j1 <= j + 2; j1++) { if (!badpix[i1 * width + j1]) { sum += src->L[i1][j1]; tot += 1.f; - float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); + const float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); shsum += dirsh * src->L[i1][j1]; norm += dirsh; } @@ -1003,7 +1003,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl } if (norm > 0.f) { src->L[i][j] = shsum / norm; - } else if(tot > 0.f) { + } else if (tot > 0.f) { src->L[i][j] = sum / tot; } } @@ -1013,12 +1013,12 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl if (badpix[i * width + j]) { float norm = 0.f, shsum = 0.f, sum = 0.f, tot = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 <= j + 2; j1++) { if (!badpix[i1 * width + j1]) { sum += src->L[i1][j1]; tot += 1.f; - float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); + const float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); shsum += dirsh * src->L[i1][j1]; norm += dirsh; } @@ -1026,7 +1026,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl } if (norm > 0.f) { src->L[i][j] = shsum / norm; - } else if(tot > 0.f) { + } else if (tot > 0.f) { src->L[i][j] = sum / tot; } } @@ -1036,12 +1036,12 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl if (badpix[i * width + j]) { float norm = 0.f, shsum = 0.f, sum = 0.f, tot = 0.f; - for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) { + for (int i1 = std::max(0, i - 2); i1 <= std::min(i + 2, height - 1); i1++) { for (int j1 = j - 2; j1 < width; j1++) { if (!badpix[i1 * width + j1]) { sum += src->L[i1][j1]; tot += 1.f; - float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); + const float dirsh = 1.f / (SQR(src->L[i1][j1] - src->L[i][j]) + eps); shsum += dirsh * src->L[i1][j1]; norm += dirsh; } @@ -1049,7 +1049,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl } if (norm > 0.f) { src->L[i][j] = shsum / norm; - } else if(tot > 0.f) { + } else if (tot > 0.f) { src->L[i][j] = sum / tot; } } @@ -1059,8 +1059,8 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl // end luma badpixels - float ** tmaa = tmL; // reuse tmL buffer - const JaggedArray tmbb(width, height); + float** const tmaa = tmL; // reuse tmL buffer + JaggedArray tmbb(width, height); #ifdef _OPENMP #pragma omp parallel @@ -1078,17 +1078,17 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl #pragma omp parallel for reduction(+:chrommed) #endif - for(int i = 0; i < height; i++ ) { - for(int j = 0; j < width; j++) { - float chroma = SQR(src->a[i][j] - tmaa[i][j]) + SQR(src->b[i][j] - tmbb[i][j]); + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + const float chroma = SQR(src->a[i][j] - tmaa[i][j]) + SQR(src->b[i][j] - tmbb[i][j]); chrommed += chroma; badpix[i * width + j] = chroma; } } - chrommed /= (height * width); - if(chrommed > 0.0) { + chrommed /= height * width; + if (chrommed > 0.0) { // now as chrommed is calculated, we postprocess badpix to reduce the number of divisions in future #ifdef _OPENMP @@ -1096,21 +1096,21 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl #endif { #ifdef __SSE2__ - vfloat chrommedv = F2V(chrommed); - vfloat onev = F2V(1.f); + const vfloat chrommedv = F2V(chrommed); + const vfloat onev = F2V(1.f); #endif #ifdef _OPENMP #pragma omp for #endif - for(int i = 0; i < height; i++) { + for (int i = 0; i < height; i++) { int j = 0; #ifdef __SSE2__ - for(; j < width - 3; j += 4) { + for (; j < width - 3; j += 4) { STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + chrommedv)); } #endif - for(; j < width; j++) { + for (; j < width; j++) { badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed); } } @@ -1125,22 +1125,21 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl #pragma omp parallel for schedule(dynamic,16) #endif - for(int i = 0; i < height; i++ ) { + for (int i = 0; i < height; i++) { int j = 0; - for(; j < halfwin; j++) { - + for (; j < halfwin; j++) { if (badpix[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) { + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = 0; j1 < j + halfwin; j1++) { - float wt = badpix[i1 * width + j1]; + const float wt = badpix[i1 * width + j1]; atot += wt * src->a[i1][j1]; btot += wt * src->b[i1][j1]; norm += wt; } } - if(SQR(atot) + SQR(btot) < chrom * SQR(norm)) { + if (SQR(atot) + SQR(btot) < chrom * SQR(norm)) { src->a[i][j] = atot / norm; src->b[i][j] = btot / norm; } @@ -1148,66 +1147,65 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl } #ifdef __SSE2__ - vfloat chromv = F2V(chrom); - vfloat threshfactorv = F2V(threshfactor); - for(; j < width - halfwin - 3; j+=4) { - + const vfloat chromv = F2V(chrom); + const vfloat threshfactorv = F2V(threshfactor); + for (; j < width - halfwin - 3; j += 4) { vmask selMask = vmaskf_lt(LVFU(badpix[i * width + j]), threshfactorv); - if (_mm_movemask_ps((vfloat)selMask)) { + if (_mm_movemask_ps(reinterpret_cast(selMask))) { vfloat atotv = ZEROV, btotv = ZEROV, normv = ZEROV; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) { + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { - vfloat wtv = LVFU(badpix[i1 * width + j1]); + const vfloat wtv = LVFU(badpix[i1 * width + j1]); atotv += wtv * LVFU(src->a[i1][j1]); btotv += wtv * LVFU(src->b[i1][j1]); normv += wtv; } } selMask = vandm(selMask, vmaskf_lt(SQRV(atotv) + SQR(btotv), chromv * SQRV(normv))); - if(_mm_movemask_ps((vfloat)selMask)) { - vfloat aOrig = LVFU(src->a[i][j]); - vfloat bOrig = LVFU(src->b[i][j]); + if (_mm_movemask_ps(reinterpret_cast(selMask))) { + const vfloat aOrig = LVFU(src->a[i][j]); + const vfloat bOrig = LVFU(src->b[i][j]); STVFU(src->a[i][j], vself(selMask, atotv / normv, aOrig)); STVFU(src->b[i][j], vself(selMask, btotv / normv, bOrig)); } } } #endif - for(; j < width - halfwin; j++) { + for (; j < width - halfwin; j++) { if (badpix[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) { + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) { - float wt = badpix[i1 * width + j1]; + const float wt = badpix[i1 * width + j1]; atot += wt * src->a[i1][j1]; btot += wt * src->b[i1][j1]; norm += wt; } } - if(SQR(atot) + SQR(btot) < chrom * SQR(norm)) { + if (SQR(atot) + SQR(btot) < chrom * SQR(norm)) { src->a[i][j] = atot / norm; src->b[i][j] = btot / norm; } } } - for(; j < width; j++) { + for (; j < width; j++) { if (badpix[i * width + j] < threshfactor) { float atot = 0.f, btot = 0.f, norm = 0.f; - for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++) { + for (int i1 = std::max(0, i - halfwin + 1); i1 < std::min(height, i + halfwin); i1++) { for (int j1 = j - halfwin + 1; j1 < width; j1++) { - float wt = badpix[i1 * width + j1]; + const float wt = badpix[i1 * width + j1]; atot += wt * src->a[i1][j1]; btot += wt * src->b[i1][j1]; norm += wt; } } - if(SQR(atot) + SQR(btot) < chrom * SQR(norm)) { + if (SQR(atot) + SQR(btot) < chrom * SQR(norm)) { src->a[i][j] = atot / norm; src->b[i][j] = btot / norm; }