From 4d6c3f2ce2438fcd9ecc7337f2329b493d2a050f Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 7 Jul 2019 15:29:24 +0200 Subject: [PATCH 1/9] Speedup for color propagation --- rtengine/hilite_recon.cc | 137 ++++++++++++++++++++++++-------------- rtengine/rawimagesource.h | 2 - 2 files changed, 86 insertions(+), 53 deletions(-) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index 699d42071..38da3ea26 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -28,14 +28,11 @@ #include "rawimagesource.h" #include "rt_math.h" #include "opthelper.h" -namespace rtengine -{ +#define BENCHMARK +#include "StopWatch.h" -extern const Settings* settings; - -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -void RawImageSource::boxblur2(float** src, float** dst, float** temp, int H, int W, int box ) +namespace { +void boxblur2(float** src, float** dst, float** temp, int startY, int startX, int H, int W, int box ) { //box blur image channel; box size = 2*box+1 //horizontal blur @@ -45,23 +42,23 @@ void RawImageSource::boxblur2(float** src, float** dst, float** temp, int H, int for (int row = 0; row < H; row++) { int len = box + 1; - temp[row][0] = src[row][0] / len; + temp[row][0] = src[row + startY][startX] / len; for (int j = 1; j <= box; j++) { - temp[row][0] += src[row][j] / len; + temp[row][0] += src[row + startY][j + startX] / len; } for (int col = 1; col <= box; col++) { - temp[row][col] = (temp[row][col - 1] * len + src[row][col + box]) / (len + 1); + temp[row][col] = (temp[row][col - 1] * len + src[row + startY][col + box + startX]) / (len + 1); len ++; } for (int col = box + 1; col < W - box; col++) { - temp[row][col] = temp[row][col - 1] + (src[row][col + box] - src[row][col - box - 1]) / len; + temp[row][col] = temp[row][col - 1] + (src[row + startY][col + box + startX] - src[row + startY][col - box - 1 + startX]) / len; } for (int col = W - box; col < W; col++) { - temp[row][col] = (temp[row][col - 1] * len - src[row][col - box - 1]) / (len - 1); + temp[row][col] = (temp[row][col - 1] * len - src[row + startY][col - box - 1 + startX]) / (len - 1); len --; } } @@ -210,7 +207,7 @@ void RawImageSource::boxblur2(float** src, float** dst, float** temp, int H, int } -void RawImageSource::boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int box, int samp ) +void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int box, int samp ) { #ifdef _OPENMP @@ -386,8 +383,16 @@ void RawImageSource::boxblur_resamp(float **src, float **dst, float ** temp, int } +} +namespace rtengine +{ + +extern const Settings* settings; + + void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** blue) { + BENCHFUN double progress = 0.0; if (plistener) { @@ -477,28 +482,58 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b for (int c = 0; c < ColorCount; c++) { medFactor[c] = max(1.0f, max_f[c] / medpt) / (-blendpt); } + int minx = width - 1; + int maxx = 0; + int miny = height - 1; + int maxy = 0; - multi_array2D channelblur(width, height, 0, 48); - array2D temp(width, height); // allocate temporary buffer + #pragma omp parallel for reduction(min:minx,miny) reduction(max:maxx,maxy) schedule(dynamic, 16) + for (int i = 0; i < height; ++i) { + for (int j = 0; j< width; ++j) { + if(red[i][j] >= max_f[0] || green[i][j] >= max_f[1] || blue[i][j] >= max_f[2]) { + minx = std::min(minx, j); + maxx = std::max(maxx, j); + miny = std::min(miny, i); + maxy = std::max(maxy, i); + } + } + } + + std::cout << "minx : " << minx << std::endl; + std::cout << "maxx : " << maxx << std::endl; + std::cout << "miny : " << miny << std::endl; + std::cout << "maxy : " << maxy << std::endl; + + constexpr int blurBorder = 256; + minx = std::max(0, minx - blurBorder); + miny = std::max(0, miny - blurBorder); + maxx = std::min(width - 1, maxx + blurBorder); + maxy = std::min(height - 1, maxy + blurBorder); + const int blurWidth = maxx - minx + 1; + const int blurHeight = maxy - miny + 1; + + std::cout << "Corrected area reduced by factor: " << (((float)width * height) / (blurWidth * blurHeight)) << std::endl; + multi_array2D channelblur(blurWidth, blurHeight, 0, 48); + array2D temp(blurWidth, blurHeight); // allocate temporary buffer // blur RGB channels - boxblur2(red, channelblur[0], temp, height, width, 4); + boxblur2(red, channelblur[0], temp, miny, minx, blurHeight, blurWidth, 4); if(plistener) { progress += 0.05; plistener->setProgress(progress); } - boxblur2(green, channelblur[1], temp, height, width, 4); + boxblur2(green, channelblur[1], temp, miny, minx, blurHeight, blurWidth, 4); if(plistener) { progress += 0.05; plistener->setProgress(progress); } - boxblur2(blue, channelblur[2], temp, height, width, 4); - + boxblur2(blue, channelblur[2], temp, miny, minx, blurHeight, blurWidth, 4); + if(plistener) { progress += 0.05; plistener->setProgress(progress); @@ -509,9 +544,9 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b #pragma omp parallel for #endif - for(int i = 0; i < height; i++) - for(int j = 0; j < width; j++) { - channelblur[0][i][j] = fabsf(channelblur[0][i][j] - red[i][j]) + fabsf(channelblur[1][i][j] - green[i][j]) + fabsf(channelblur[2][i][j] - blue[i][j]); + for(int i = 0; i < blurHeight; i++) + for(int j = 0; j < blurWidth; j++) { + channelblur[0][i][j] = fabsf(channelblur[0][i][j] - red[i + miny][j + minx]) + fabsf(channelblur[1][i][j] - green[i + miny][j + minx]) + fabsf(channelblur[2][i][j] - blue[i + miny][j + minx]); } for (int c = 1; c < 3; c++) { @@ -523,7 +558,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b plistener->setProgress(progress); } - multi_array2D hilite_full(width, height, ARRAY2D_CLEAR_DATA, 32); + multi_array2D hilite_full(blurWidth, blurHeight, ARRAY2D_CLEAR_DATA, 32); if(plistener) { progress += 0.10; @@ -538,18 +573,18 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b #pragma omp parallel for reduction(+:hipass_sum,hipass_norm) schedule(dynamic,16) #endif - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { + for (int i = 0; i < blurHeight; i++) { + for (int j = 0; j < blurWidth; j++) { //if one or more channels is highlight but none are blown, add to highlight accumulator - if ((red[i][j] > thresh[0] || green[i][j] > thresh[1] || blue[i][j] > thresh[2]) && - (red[i][j] < max_f[0] && green[i][j] < max_f[1] && blue[i][j] < max_f[2])) { + if ((red[i + miny][j + minx] > thresh[0] || green[i + miny][j + minx] > thresh[1] || blue[i + miny][j + minx] > thresh[2]) && + (red[i + miny][j + minx] < max_f[0] && green[i + miny][j + minx] < max_f[1] && blue[i + miny][j + minx] < max_f[2])) { hipass_sum += channelblur[0][i][j]; hipass_norm ++; - hilite_full[0][i][j] = red[i][j]; - hilite_full[1][i][j] = green[i][j]; - hilite_full[2][i][j] = blue[i][j]; + hilite_full[0][i][j] = red[i + miny][j + minx]; + hilite_full[1][i][j] = green[i + miny][j + minx]; + hilite_full[2][i][j] = blue[i + miny][j + minx]; hilite_full[3][i][j] = 1.f; } @@ -563,10 +598,10 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b plistener->setProgress(progress); } - array2D hilite_full4(width, height); + array2D hilite_full4(blurWidth, blurHeight); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //blur highlight data - boxblur2(hilite_full[3], hilite_full4, temp, height, width, 1); + boxblur2(hilite_full[3], hilite_full4, temp, 0, 0, blurHeight, blurWidth, 1); temp.free(); // free temporary buffer @@ -579,8 +614,8 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b #pragma omp parallel for schedule(dynamic,16) #endif - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { + for (int i = 0; i < blurHeight; i++) { + for (int j = 0; j < blurWidth; j++) { if (channelblur[0][i][j] > hipass_ave) { //too much variation hilite_full[0][i][j] = hilite_full[1][i][j] = hilite_full[2][i][j] = hilite_full[3][i][j] = 0.f; @@ -597,18 +632,18 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b channelblur[0].free(); //free up some memory hilite_full4.free(); //free up some memory - int hfh = (height - (height % pitch)) / pitch; - int hfw = (width - (width % pitch)) / pitch; + int hfh = (blurHeight - (blurHeight % pitch)) / pitch; + int hfw = (blurWidth - (blurWidth % pitch)) / pitch; multi_array2D hilite(hfw + 1, hfh + 1, ARRAY2D_CLEAR_DATA, 48); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // blur and resample highlight data; range=size of blur, pitch=sample spacing - array2D temp2((width / pitch) + ((width % pitch) == 0 ? 0 : 1), height); + array2D temp2((blurWidth / pitch) + ((blurWidth % pitch) == 0 ? 0 : 1), blurHeight); for (int m = 0; m < 4; m++) { - boxblur_resamp(hilite_full[m], hilite[m], temp2, height, width, range, pitch); + boxblur_resamp(hilite_full[m], hilite[m], temp2, blurHeight, blurWidth, range, pitch); if(plistener) { progress += 0.05; @@ -953,12 +988,12 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b #pragma omp parallel for schedule(dynamic,16) #endif - for (int i = 0; i < height; i++) { + for (int i = 0; i < blurHeight; i++) { int i1 = min((i - (i % pitch)) / pitch, hfh - 1); - for (int j = 0; j < width; j++) { + for (int j = 0; j < blurWidth; j++) { - float pixel[3] = {red[i][j], green[i][j], blue[i][j]}; + float pixel[3] = {red[i + miny][j + minx], green[i + miny][j + minx], blue[i + miny][j + minx]}; if (pixel[0] < max_f[0] && pixel[1] < max_f[1] && pixel[2] < max_f[2]) { continue; //pixel not clipped @@ -1109,36 +1144,36 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b float Y = (0.299 * clipfix[0] + 0.587 * clipfix[1] + 0.114 * clipfix[2]); float factor = whitept / Y; - red[i][j] = clipfix[0] * factor; - green[i][j] = clipfix[1] * factor; - blue[i][j] = clipfix[2] * factor; + red[i + miny][j + minx] = clipfix[0] * factor; + green[i + miny][j + minx] = clipfix[1] * factor; + blue[i + miny][j + minx] = clipfix[2] * factor; } else {//some channels clipped float notclipped[3] = {pixel[0] <= max_f[0] ? 1.f : 0.f, pixel[1] <= max_f[1] ? 1.f : 0.f, pixel[2] <= max_f[2] ? 1.f : 0.f}; if (notclipped[0] == 0.f) { //red clipped - red[i][j] = max(red[i][j], (clipfix[0] * ((notclipped[1] * pixel[1] + notclipped[2] * pixel[2]) / + red[i + miny][j + minx] = max(red[i + miny][j + minx], (clipfix[0] * ((notclipped[1] * pixel[1] + notclipped[2] * pixel[2]) / (notclipped[1] * clipfix[1] + notclipped[2] * clipfix[2] + epsilon)))); } if (notclipped[1] == 0.f) { //green clipped - green[i][j] = max(green[i][j], (clipfix[1] * ((notclipped[2] * pixel[2] + notclipped[0] * pixel[0]) / + green[i + miny][j + minx] = max(green[i + miny][j + minx], (clipfix[1] * ((notclipped[2] * pixel[2] + notclipped[0] * pixel[0]) / (notclipped[2] * clipfix[2] + notclipped[0] * clipfix[0] + epsilon)))); } if (notclipped[2] == 0.f) { //blue clipped - blue[i][j] = max(blue[i][j], (clipfix[2] * ((notclipped[0] * pixel[0] + notclipped[1] * pixel[1]) / + blue[i + miny][j + minx] = max(blue[i + miny][j + minx], (clipfix[2] * ((notclipped[0] * pixel[0] + notclipped[1] * pixel[1]) / (notclipped[0] * clipfix[0] + notclipped[1] * clipfix[1] + epsilon)))); } } - Y = (0.299 * red[i][j] + 0.587 * green[i][j] + 0.114 * blue[i][j]); + Y = (0.299 * red[i + miny][j + minx] + 0.587 * green[i + miny][j + minx] + 0.114 * blue[i + miny][j + minx]); if (Y > whitept) { float factor = whitept / Y; - red[i][j] *= factor; - green[i][j] *= factor; - blue[i][j] *= factor; + red[i + miny][j + minx] *= factor; + green[i + miny][j + minx] *= factor; + blue[i + miny][j + minx] *= factor; } } } diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index 4c7b0ba21..64018f354 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -195,8 +195,6 @@ public: } static void inverse33 (const double (*coeff)[3], double (*icoeff)[3]); - void boxblur2(float** src, float** dst, float** temp, int H, int W, int box ); - void boxblur_resamp(float **src, float **dst, float** temp, int H, int W, int box, int samp ); void MSR(float** luminance, float **originalLuminance, float **exLuminance, LUTf & mapcurve, bool &mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax); void HLRecovery_inpaint (float** red, float** green, float** blue) override; static void HLRecovery_Luminance (float* rin, float* gin, float* bin, float* rout, float* gout, float* bout, int width, float maxval); From 54fdbe41ea361467ff90104e7183fc38aad42efb Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 8 Jul 2019 23:57:24 +0200 Subject: [PATCH 2/9] dump SSE code in boxblur2 because new code is faster with auto-vectorization, also pad bufferwidth for boxblur2 to a multiple of 16 --- rtengine/hilite_recon.cc | 189 +++++++++++---------------------------- 1 file changed, 52 insertions(+), 137 deletions(-) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index 38da3ea26..b7c4d349c 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -22,6 +22,10 @@ // //////////////////////////////////////////////////////////////// +#ifndef NDEBUG +#include +#endif + #include #include #include "array2D.h" @@ -32,8 +36,11 @@ #include "StopWatch.h" namespace { -void boxblur2(float** src, float** dst, float** temp, int startY, int startX, int H, int W, int box ) +void boxblur2(const float* const* src, float** dst, float** temp, int startY, int startX, int H, int W, int box) { + constexpr int numCols = 16; + assert((W % numCols) == 0); + //box blur image channel; box size = 2*box+1 //horizontal blur #ifdef _OPENMP @@ -63,148 +70,52 @@ void boxblur2(float** src, float** dst, float** temp, int startY, int startX, in } } -#ifdef __SSE2__ //vertical blur #ifdef _OPENMP #pragma omp parallel #endif { - float len = box + 1; - vfloat lenv = F2V( len ); - vfloat lenp1v = F2V( len + 1.0f ); - vfloat onev = F2V( 1.0f ); - vfloat tempv, temp2v; + float tempvalN[numCols] ALIGNED64; #ifdef _OPENMP - #pragma omp for nowait + #pragma omp for #endif - - for (int col = 0; col < W - 7; col += 8) { - tempv = LVFU(temp[0][col]) / lenv; - temp2v = LVFU(temp[0][col + 4]) / lenv; - + for (int col = 0; col < W - numCols + 1; col += numCols) { + float len = box + 1; + for(int n = 0; n < numCols; n++) { + tempvalN[n] = temp[0][col + n] / len; + } for (int i = 1; i <= box; i++) { - tempv = tempv + LVFU(temp[i][col]) / lenv; - temp2v = temp2v + LVFU(temp[i][col + 4]) / lenv; + for(int n = 0; n < numCols; n++) { + tempvalN[n] += temp[i][col + n] / len; + } + } + for(int n = 0; n < numCols; n++) { + dst[0][col + n] = tempvalN[n]; } - - _mm_storeu_ps( &dst[0][col], tempv); - _mm_storeu_ps( &dst[0][col + 4], temp2v); - for (int row = 1; row <= box; row++) { - tempv = (tempv * lenv + LVFU(temp[(row + box)][col])) / lenp1v; - temp2v = (temp2v * lenv + LVFU(temp[(row + box)][col + 4])) / lenp1v; - _mm_storeu_ps( &dst[row][col], tempv); - _mm_storeu_ps( &dst[row][col + 4], temp2v); - lenv = lenp1v; - lenp1v = lenp1v + onev; + for(int n = 0; n < numCols; n++) { + tempvalN[n] = (tempvalN[n] * len + temp[(row + box)][col + n]) / (len + 1); + dst[row][col + n] = tempvalN[n]; + } + len ++; } - + const float rlen = 1.f / len; for (int row = box + 1; row < H - box; row++) { - tempv = tempv + (LVFU(temp[(row + box)][col]) - LVFU(temp[(row - box - 1)][col])) / lenv; - temp2v = temp2v + (LVFU(temp[(row + box)][col + 4]) - LVFU(temp[(row - box - 1)][col + 4])) / lenv; - _mm_storeu_ps( &dst[row][col], tempv); - _mm_storeu_ps( &dst[row][col + 4], temp2v); + for(int n = 0; n < numCols; n++) { + tempvalN[n] = tempvalN[n] + (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) * rlen; + dst[row][col + n] = tempvalN[n]; + } } for (int row = H - box; row < H; row++) { - lenp1v = lenv; - lenv = lenv - onev; - tempv = (tempv * lenp1v - LVFU(temp[(row - box - 1)][col])) / lenv; - temp2v = (temp2v * lenp1v - LVFU(temp[(row - box - 1)][col + 4])) / lenv; - _mm_storeu_ps( &dst[row][col], tempv ); - _mm_storeu_ps( &dst[row][col + 4], temp2v ); - } - } - -#ifdef _OPENMP - #pragma omp single -#endif - { - for (int col = W - (W % 8); col < W - 3; col += 4) { - tempv = LVFU(temp[0][col]) / lenv; - - for (int i = 1; i <= box; i++) { - tempv = tempv + LVFU(temp[i][col]) / lenv; - } - - _mm_storeu_ps( &dst[0][col], tempv); - - for (int row = 1; row <= box; row++) { - tempv = (tempv * lenv + LVFU(temp[(row + box)][col])) / lenp1v; - _mm_storeu_ps( &dst[row][col], tempv); - lenv = lenp1v; - lenp1v = lenp1v + onev; - } - - for (int row = box + 1; row < H - box; row++) { - tempv = tempv + (LVFU(temp[(row + box)][col]) - LVFU(temp[(row - box - 1)][col])) / lenv; - _mm_storeu_ps( &dst[row][col], tempv); - } - - for (int row = H - box; row < H; row++) { - lenp1v = lenv; - lenv = lenv - onev; - tempv = (tempv * lenp1v - LVFU(temp[(row - box - 1)][col])) / lenv; - _mm_storeu_ps( &dst[row][col], tempv ); - } - } - - for (int col = W - (W % 4); col < W; col++) { - int len = box + 1; - dst[0][col] = temp[0][col] / len; - - for (int i = 1; i <= box; i++) { - dst[0][col] += temp[i][col] / len; - } - - for (int row = 1; row <= box; row++) { - dst[row][col] = (dst[(row - 1)][col] * len + temp[(row + box)][col]) / (len + 1); - len ++; - } - - for (int row = box + 1; row < H - box; row++) { - dst[row][col] = dst[(row - 1)][col] + (temp[(row + box)][col] - temp[(row - box - 1)][col]) / len; - } - - for (int row = H - box; row < H; row++) { - dst[row][col] = (dst[(row - 1)][col] * len - temp[(row - box - 1)][col]) / (len - 1); - len --; + for(int n = 0; n < numCols; n++) { + tempvalN[n] = (dst[(row - 1)][col + n] * len - temp[(row - box - 1)][col + n]) / (len - 1); + dst[row][col + n] = tempvalN[n]; } + len --; } } } - -#else - //vertical blur -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int col = 0; col < W; col++) { - int len = box + 1; - dst[0][col] = temp[0][col] / len; - - for (int i = 1; i <= box; i++) { - dst[0][col] += temp[i][col] / len; - } - - for (int row = 1; row <= box; row++) { - dst[row][col] = (dst[(row - 1)][col] * len + temp[(row + box)][col]) / (len + 1); - len ++; - } - - for (int row = box + 1; row < H - box; row++) { - dst[row][col] = dst[(row - 1)][col] + (temp[(row + box)][col] - temp[(row - box - 1)][col]) / len; - } - - for (int row = H - box; row < H; row++) { - dst[row][col] = (dst[(row - 1)][col] * len - temp[(row - box - 1)][col]) / (len - 1); - len --; - } - } - -#endif - } void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int box, int samp ) @@ -263,19 +174,19 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b } } - static const int numCols = 8; // process numCols columns at once for better L1 CPU cache usage + constexpr int numCols = 8; // process numCols columns at once for better L1 CPU cache usage #ifdef _OPENMP #pragma omp parallel #endif { - float tempvalN[numCols] ALIGNED16; + float tempvalN[numCols] ALIGNED64; #ifdef _OPENMP #pragma omp for nowait #endif //vertical blur for (int col = 0; col < (W / samp) - (numCols - 1); col += numCols) { - int len = box + 1; + float len = box + 1; for(int n = 0; n < numCols; n++) { tempvalN[n] = temp[0][col + n] / len; @@ -304,10 +215,10 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b len ++; } - + const float rlen = 1.f / len; for (int row = box + 1; row < H - box; row++) { for(int n = 0; n < numCols; n++) { - tempvalN[n] = tempvalN[n] + (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) / len; + tempvalN[n] = tempvalN[n] + (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) * rlen; } if(row % samp == 0) { @@ -511,28 +422,32 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b maxy = std::min(height - 1, maxy + blurBorder); const int blurWidth = maxx - minx + 1; const int blurHeight = maxy - miny + 1; + const int bufferWidth = blurWidth + ((16 - (blurWidth % 16)) & 15); + + std::cout << "blurWidth : " << blurWidth << std::endl; + std::cout << "bufferWidth : " << bufferWidth << std::endl; std::cout << "Corrected area reduced by factor: " << (((float)width * height) / (blurWidth * blurHeight)) << std::endl; - multi_array2D channelblur(blurWidth, blurHeight, 0, 48); - array2D temp(blurWidth, blurHeight); // allocate temporary buffer + multi_array2D channelblur(bufferWidth, blurHeight, 0, 48); + array2D temp(bufferWidth, blurHeight); // allocate temporary buffer // blur RGB channels - boxblur2(red, channelblur[0], temp, miny, minx, blurHeight, blurWidth, 4); + boxblur2(red, channelblur[0], temp, miny, minx, blurHeight, bufferWidth, 4); if(plistener) { progress += 0.05; plistener->setProgress(progress); } - boxblur2(green, channelblur[1], temp, miny, minx, blurHeight, blurWidth, 4); + boxblur2(green, channelblur[1], temp, miny, minx, blurHeight, bufferWidth, 4); if(plistener) { progress += 0.05; plistener->setProgress(progress); } - boxblur2(blue, channelblur[2], temp, miny, minx, blurHeight, blurWidth, 4); + boxblur2(blue, channelblur[2], temp, miny, minx, blurHeight, bufferWidth, 4); if(plistener) { progress += 0.05; @@ -558,7 +473,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b plistener->setProgress(progress); } - multi_array2D hilite_full(blurWidth, blurHeight, ARRAY2D_CLEAR_DATA, 32); + multi_array2D hilite_full(bufferWidth, blurHeight, ARRAY2D_CLEAR_DATA, 32); if(plistener) { progress += 0.10; @@ -598,10 +513,10 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b plistener->setProgress(progress); } - array2D hilite_full4(blurWidth, blurHeight); + array2D hilite_full4(bufferWidth, blurHeight); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //blur highlight data - boxblur2(hilite_full[3], hilite_full4, temp, 0, 0, blurHeight, blurWidth, 1); + boxblur2(hilite_full[3], hilite_full4, temp, 0, 0, blurHeight, bufferWidth, 1); temp.free(); // free temporary buffer From 4f73e5bb3cb4cbdd5749a8b7614319b65567cee8 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 9 Jul 2019 00:20:04 +0200 Subject: [PATCH 3/9] Fix oob access in last commit --- rtengine/hilite_recon.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index b7c4d349c..e78bd7365 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -36,10 +36,10 @@ #include "StopWatch.h" namespace { -void boxblur2(const float* const* src, float** dst, float** temp, int startY, int startX, int H, int W, int box) +void boxblur2(const float* const* src, float** dst, float** temp, int startY, int startX, int H, int W, int bufferW, int box) { constexpr int numCols = 16; - assert((W % numCols) == 0); + assert((bufferW % numCols) == 0); //box blur image channel; box size = 2*box+1 //horizontal blur @@ -79,7 +79,7 @@ void boxblur2(const float* const* src, float** dst, float** temp, int startY, in #ifdef _OPENMP #pragma omp for #endif - for (int col = 0; col < W - numCols + 1; col += numCols) { + for (int col = 0; col < bufferW - numCols + 1; col += numCols) { float len = box + 1; for(int n = 0; n < numCols; n++) { tempvalN[n] = temp[0][col + n] / len; @@ -433,21 +433,21 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b // blur RGB channels - boxblur2(red, channelblur[0], temp, miny, minx, blurHeight, bufferWidth, 4); + boxblur2(red, channelblur[0], temp, miny, minx, blurHeight, blurWidth, bufferWidth, 4); if(plistener) { progress += 0.05; plistener->setProgress(progress); } - boxblur2(green, channelblur[1], temp, miny, minx, blurHeight, bufferWidth, 4); + boxblur2(green, channelblur[1], temp, miny, minx, blurHeight, blurWidth, bufferWidth, 4); if(plistener) { progress += 0.05; plistener->setProgress(progress); } - boxblur2(blue, channelblur[2], temp, miny, minx, blurHeight, bufferWidth, 4); + boxblur2(blue, channelblur[2], temp, miny, minx, blurHeight, blurWidth, bufferWidth, 4); if(plistener) { progress += 0.05; @@ -516,7 +516,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b array2D hilite_full4(bufferWidth, blurHeight); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //blur highlight data - boxblur2(hilite_full[3], hilite_full4, temp, 0, 0, blurHeight, bufferWidth, 1); + boxblur2(hilite_full[3], hilite_full4, temp, 0, 0, blurHeight, blurWidth, bufferWidth, 1); temp.free(); // free temporary buffer From fe43bf1bf2df00d51826093cd09269c03a7ece03 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 9 Jul 2019 00:40:25 +0200 Subject: [PATCH 4/9] color propagation: use up to 4 cores where previously only up to 3 cores were used --- rtengine/hilite_recon.cc | 313 +++++++++++++++++++++------------------ 1 file changed, 169 insertions(+), 144 deletions(-) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index e78bd7365..a726481dd 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -617,192 +617,217 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - #ifdef _OPENMP - #pragma omp parallel for + #pragma omp parallel +#endif + { +#ifdef _OPENMP + #pragma omp for nowait #endif - for (int c = 0; c < 3; c++) { - for (int j = 1; j < hfw - 1; j++) { - for (int i = 2; i < hfh - 2; i++) { - //from left - if (hilite[3][i][j] > epsilon) { - hilite_dir0[c][j][i] = hilite[c][i][j] / hilite[3][i][j]; - } else { - hilite_dir0[c][j][i] = 0.1f * ((hilite_dir0[0 + c][j - 1][i - 2] + hilite_dir0[0 + c][j - 1][i - 1] + hilite_dir0[0 + c][j - 1][i] + hilite_dir0[0 + c][j - 1][i + 1] + hilite_dir0[0 + c][j - 1][i + 2]) / - (hilite_dir0[0 + 3][j - 1][i - 2] + hilite_dir0[0 + 3][j - 1][i - 1] + hilite_dir0[0 + 3][j - 1][i] + hilite_dir0[0 + 3][j - 1][i + 1] + hilite_dir0[0 + 3][j - 1][i + 2] + epsilon)); + for (int c = 0; c < 3; c++) { + for (int j = 1; j < hfw - 1; j++) { + for (int i = 2; i < hfh - 2; i++) { + //from left + if (hilite[3][i][j] > epsilon) { + hilite_dir0[c][j][i] = hilite[c][i][j] / hilite[3][i][j]; + } else { + hilite_dir0[c][j][i] = 0.1f * ((hilite_dir0[0 + c][j - 1][i - 2] + hilite_dir0[0 + c][j - 1][i - 1] + hilite_dir0[0 + c][j - 1][i] + hilite_dir0[0 + c][j - 1][i + 1] + hilite_dir0[0 + c][j - 1][i + 2]) / + (hilite_dir0[0 + 3][j - 1][i - 2] + hilite_dir0[0 + 3][j - 1][i - 1] + hilite_dir0[0 + 3][j - 1][i] + hilite_dir0[0 + 3][j - 1][i + 1] + hilite_dir0[0 + 3][j - 1][i + 2] + epsilon)); + } + } + + if(hilite[3][2][j] <= epsilon) { + hilite_dir[0 + c][0][j] = hilite_dir0[c][j][2]; + } + + if(hilite[3][3][j] <= epsilon) { + hilite_dir[0 + c][1][j] = hilite_dir0[c][j][3]; + } + + if(hilite[3][hfh - 3][j] <= epsilon) { + hilite_dir[4 + c][hfh - 1][j] = hilite_dir0[c][j][hfh - 3]; + } + + if(hilite[3][hfh - 4][j] <= epsilon) { + hilite_dir[4 + c][hfh - 2][j] = hilite_dir0[c][j][hfh - 4]; } } - if(hilite[3][2][j] <= epsilon) { - hilite_dir[0 + c][0][j] = hilite_dir0[c][j][2]; - } - - if(hilite[3][3][j] <= epsilon) { - hilite_dir[0 + c][1][j] = hilite_dir0[c][j][3]; - } - - if(hilite[3][hfh - 3][j] <= epsilon) { - hilite_dir[4 + c][hfh - 1][j] = hilite_dir0[c][j][hfh - 3]; - } - - if(hilite[3][hfh - 4][j] <= epsilon) { - hilite_dir[4 + c][hfh - 2][j] = hilite_dir0[c][j][hfh - 4]; + for (int i = 2; i < hfh - 2; i++) { + if(hilite[3][i][hfw - 2] <= epsilon) { + hilite_dir4[c][hfw - 1][i] = hilite_dir0[c][hfw - 2][i]; + } } } - for (int i = 2; i < hfh - 2; i++) { - if(hilite[3][i][hfw - 2] <= epsilon) { - hilite_dir4[c][hfw - 1][i] = hilite_dir0[c][hfw - 2][i]; +#ifdef _OPENMP + #pragma omp single +#endif + { + for (int j = hfw - 2; j > 0; j--) { + for (int i = 2; i < hfh - 2; i++) { + //from right + if (hilite[3][i][j] > epsilon) { + hilite_dir4[3][j][i] = 1.f; + } else { + hilite_dir4[3][j][i] = (hilite_dir4[3][(j + 1)][(i - 2)] + hilite_dir4[3][(j + 1)][(i - 1)] + hilite_dir4[3][(j + 1)][(i)] + hilite_dir4[3][(j + 1)][(i + 1)] + hilite_dir4[3][(j + 1)][(i + 2)]) == 0.f ? 0.f : 0.1f; + } + } + + if(hilite[3][2][j] <= epsilon) { + hilite_dir[0 + 3][0][j] += hilite_dir4[3][j][2]; + } + + if(hilite[3][hfh - 3][j] <= epsilon) { + hilite_dir[4 + 3][hfh - 1][j] += hilite_dir4[3][j][hfh - 3]; + } + } + + for (int i = 2; i < hfh - 2; i++) { + if(hilite[3][i][0] <= epsilon) { + hilite_dir[0 + 3][i - 2][0] += hilite_dir4[3][0][i]; + hilite_dir[4 + 3][i + 2][0] += hilite_dir4[3][0][i]; + } + + if(hilite[3][i][1] <= epsilon) { + hilite_dir[0 + 3][i - 2][1] += hilite_dir4[3][1][i]; + hilite_dir[4 + 3][i + 2][1] += hilite_dir4[3][1][i]; + } + + if(hilite[3][i][hfw - 2] <= epsilon) { + hilite_dir[0 + 3][i - 2][hfw - 2] += hilite_dir4[3][hfw - 2][i]; + hilite_dir[4 + 3][i + 2][hfw - 2] += hilite_dir4[3][hfw - 2][i]; + } } } } - if(plistener) { progress += 0.05; plistener->setProgress(progress); } - for (int j = hfw - 2; j > 0; j--) { - for (int i = 2; i < hfh - 2; i++) { - //from right - if (hilite[3][i][j] > epsilon) { - hilite_dir4[3][j][i] = 1.f; - } else { - hilite_dir4[3][j][i] = (hilite_dir4[3][(j + 1)][(i - 2)] + hilite_dir4[3][(j + 1)][(i - 1)] + hilite_dir4[3][(j + 1)][(i)] + hilite_dir4[3][(j + 1)][(i + 1)] + hilite_dir4[3][(j + 1)][(i + 2)]) == 0.f ? 0.f : 0.1f; - } - } - - if(hilite[3][2][j] <= epsilon) { - hilite_dir[0 + 3][0][j] += hilite_dir4[3][j][2]; - } - - if(hilite[3][hfh - 3][j] <= epsilon) { - hilite_dir[4 + 3][hfh - 1][j] += hilite_dir4[3][j][hfh - 3]; - } - } - - for (int i = 2; i < hfh - 2; i++) { - if(hilite[3][i][0] <= epsilon) { - hilite_dir[0 + 3][i - 2][0] += hilite_dir4[3][0][i]; - hilite_dir[4 + 3][i + 2][0] += hilite_dir4[3][0][i]; - } - - if(hilite[3][i][1] <= epsilon) { - hilite_dir[0 + 3][i - 2][1] += hilite_dir4[3][1][i]; - hilite_dir[4 + 3][i + 2][1] += hilite_dir4[3][1][i]; - } - - if(hilite[3][i][hfw - 2] <= epsilon) { - hilite_dir[0 + 3][i - 2][hfw - 2] += hilite_dir4[3][hfw - 2][i]; - hilite_dir[4 + 3][i + 2][hfw - 2] += hilite_dir4[3][hfw - 2][i]; - } - } - #ifdef _OPENMP - #pragma omp parallel for + #pragma omp parallel +#endif + { +#ifdef _OPENMP + #pragma omp for nowait #endif - for (int c = 0; c < 3; c++) { - for (int j = hfw - 2; j > 0; j--) { - for (int i = 2; i < hfh - 2; i++) { - //from right - if (hilite[3][i][j] > epsilon) { - hilite_dir4[c][j][i] = hilite[c][i][j] / hilite[3][i][j]; - } else { - hilite_dir4[c][j][i] = 0.1 * ((hilite_dir4[c][(j + 1)][(i - 2)] + hilite_dir4[c][(j + 1)][(i - 1)] + hilite_dir4[c][(j + 1)][(i)] + hilite_dir4[c][(j + 1)][(i + 1)] + hilite_dir4[c][(j + 1)][(i + 2)]) / - (hilite_dir4[3][(j + 1)][(i - 2)] + hilite_dir4[3][(j + 1)][(i - 1)] + hilite_dir4[3][(j + 1)][(i)] + hilite_dir4[3][(j + 1)][(i + 1)] + hilite_dir4[3][(j + 1)][(i + 2)] + epsilon)); + for (int c = 0; c < 3; c++) { + for (int j = hfw - 2; j > 0; j--) { + for (int i = 2; i < hfh - 2; i++) { + //from right + if (hilite[3][i][j] > epsilon) { + hilite_dir4[c][j][i] = hilite[c][i][j] / hilite[3][i][j]; + } else { + hilite_dir4[c][j][i] = 0.1f * ((hilite_dir4[c][(j + 1)][(i - 2)] + hilite_dir4[c][(j + 1)][(i - 1)] + hilite_dir4[c][(j + 1)][(i)] + hilite_dir4[c][(j + 1)][(i + 1)] + hilite_dir4[c][(j + 1)][(i + 2)]) / + (hilite_dir4[3][(j + 1)][(i - 2)] + hilite_dir4[3][(j + 1)][(i - 1)] + hilite_dir4[3][(j + 1)][(i)] + hilite_dir4[3][(j + 1)][(i + 1)] + hilite_dir4[3][(j + 1)][(i + 2)] + epsilon)); + } + } + + if(hilite[3][2][j] <= epsilon) { + hilite_dir[0 + c][0][j] += hilite_dir4[c][j][2]; + } + + if(hilite[3][hfh - 3][j] <= epsilon) { + hilite_dir[4 + c][hfh - 1][j] += hilite_dir4[c][j][hfh - 3]; } } - if(hilite[3][2][j] <= epsilon) { - hilite_dir[0 + c][0][j] += hilite_dir4[c][j][2]; - } + for (int i = 2; i < hfh - 2; i++) { + if(hilite[3][i][0] <= epsilon) { + hilite_dir[0 + c][i - 2][0] += hilite_dir4[c][0][i]; + hilite_dir[4 + c][i + 2][0] += hilite_dir4[c][0][i]; + } - if(hilite[3][hfh - 3][j] <= epsilon) { - hilite_dir[4 + c][hfh - 1][j] += hilite_dir4[c][j][hfh - 3]; + if(hilite[3][i][1] <= epsilon) { + hilite_dir[0 + c][i - 2][1] += hilite_dir4[c][1][i]; + hilite_dir[4 + c][i + 2][1] += hilite_dir4[c][1][i]; + } + + if(hilite[3][i][hfw - 2] <= epsilon) { + hilite_dir[0 + c][i - 2][hfw - 2] += hilite_dir4[c][hfw - 2][i]; + hilite_dir[4 + c][i + 2][hfw - 2] += hilite_dir4[c][hfw - 2][i]; + } } } - for (int i = 2; i < hfh - 2; i++) { - if(hilite[3][i][0] <= epsilon) { - hilite_dir[0 + c][i - 2][0] += hilite_dir4[c][0][i]; - hilite_dir[4 + c][i + 2][0] += hilite_dir4[c][0][i]; - } - - if(hilite[3][i][1] <= epsilon) { - hilite_dir[0 + c][i - 2][1] += hilite_dir4[c][1][i]; - hilite_dir[4 + c][i + 2][1] += hilite_dir4[c][1][i]; - } - - if(hilite[3][i][hfw - 2] <= epsilon) { - hilite_dir[0 + c][i - 2][hfw - 2] += hilite_dir4[c][hfw - 2][i]; - hilite_dir[4 + c][i + 2][hfw - 2] += hilite_dir4[c][hfw - 2][i]; - } - } - } - - if(plistener) { - progress += 0.05; - plistener->setProgress(progress); - } - - - for (int i = 1; i < hfh - 1; i++) - for (int j = 2; j < hfw - 2; j++) { - //from top - if (hilite[3][i][j] > epsilon) { - hilite_dir[0 + 3][i][j] = 1.f; - } else { - hilite_dir[0 + 3][i][j] = (hilite_dir[0 + 3][i - 1][j - 2] + hilite_dir[0 + 3][i - 1][j - 1] + hilite_dir[0 + 3][i - 1][j] + hilite_dir[0 + 3][i - 1][j + 1] + hilite_dir[0 + 3][i - 1][j + 2]) == 0.f ? 0.f : 0.1f; - } - } - - for (int j = 2; j < hfw - 2; j++) { - if(hilite[3][hfh - 2][j] <= epsilon) { - hilite_dir[4 + 3][hfh - 1][j] += hilite_dir[0 + 3][hfh - 2][j]; - } - } - #ifdef _OPENMP - #pragma omp parallel for + #pragma omp single #endif + { + for (int i = 1; i < hfh - 1; i++) + for (int j = 2; j < hfw - 2; j++) { + //from top + if (hilite[3][i][j] > epsilon) { + hilite_dir[0 + 3][i][j] = 1.f; + } else { + hilite_dir[0 + 3][i][j] = (hilite_dir[0 + 3][i - 1][j - 2] + hilite_dir[0 + 3][i - 1][j - 1] + hilite_dir[0 + 3][i - 1][j] + hilite_dir[0 + 3][i - 1][j + 1] + hilite_dir[0 + 3][i - 1][j + 2]) == 0.f ? 0.f : 0.1f; + } + } - for (int c = 0; c < 3; c++) { - for (int i = 1; i < hfh - 1; i++) { for (int j = 2; j < hfw - 2; j++) { - //from top - if (hilite[3][i][j] > epsilon) { - hilite_dir[0 + c][i][j] = hilite[c][i][j] / hilite[3][i][j]; - } else { - hilite_dir[0 + c][i][j] = 0.1 * ((hilite_dir[0 + c][i - 1][j - 2] + hilite_dir[0 + c][i - 1][j - 1] + hilite_dir[0 + c][i - 1][j] + hilite_dir[0 + c][i - 1][j + 1] + hilite_dir[0 + c][i - 1][j + 2]) / - (hilite_dir[0 + 3][i - 1][j - 2] + hilite_dir[0 + 3][i - 1][j - 1] + hilite_dir[0 + 3][i - 1][j] + hilite_dir[0 + 3][i - 1][j + 1] + hilite_dir[0 + 3][i - 1][j + 2] + epsilon)); + if(hilite[3][hfh - 2][j] <= epsilon) { + hilite_dir[4 + 3][hfh - 1][j] += hilite_dir[0 + 3][hfh - 2][j]; } } } - - for (int j = 2; j < hfw - 2; j++) { - if(hilite[3][hfh - 2][j] <= epsilon) { - hilite_dir[4 + c][hfh - 1][j] += hilite_dir[0 + c][hfh - 2][j]; - } - } } - if(plistener) { progress += 0.05; plistener->setProgress(progress); } - for (int i = hfh - 2; i > 0; i--) - for (int j = 2; j < hfw - 2; j++) { - //from bottom - if (hilite[3][i][j] > epsilon) { - hilite_dir[4 + 3][i][j] = 1.f; - } else { - hilite_dir[4 + 3][i][j] = (hilite_dir[4 + 3][(i + 1)][(j - 2)] + hilite_dir[4 + 3][(i + 1)][(j - 1)] + hilite_dir[4 + 3][(i + 1)][(j)] + hilite_dir[4 + 3][(i + 1)][(j + 1)] + hilite_dir[4 + 3][(i + 1)][(j + 2)]) == 0.f ? 0.f : 0.1f; +#ifdef _OPENMP + #pragma omp parallel +#endif + { +#ifdef _OPENMP + #pragma omp for nowait +#endif + + for (int c = 0; c < 3; c++) { + for (int i = 1; i < hfh - 1; i++) { + for (int j = 2; j < hfw - 2; j++) { + //from top + if (hilite[3][i][j] > epsilon) { + hilite_dir[0 + c][i][j] = hilite[c][i][j] / hilite[3][i][j]; + } else { + hilite_dir[0 + c][i][j] = 0.1f * ((hilite_dir[0 + c][i - 1][j - 2] + hilite_dir[0 + c][i - 1][j - 1] + hilite_dir[0 + c][i - 1][j] + hilite_dir[0 + c][i - 1][j + 1] + hilite_dir[0 + c][i - 1][j + 2]) / + (hilite_dir[0 + 3][i - 1][j - 2] + hilite_dir[0 + 3][i - 1][j - 1] + hilite_dir[0 + 3][i - 1][j] + hilite_dir[0 + 3][i - 1][j + 1] + hilite_dir[0 + 3][i - 1][j + 2] + epsilon)); + } + } + } + + for (int j = 2; j < hfw - 2; j++) { + if(hilite[3][hfh - 2][j] <= epsilon) { + hilite_dir[4 + c][hfh - 1][j] += hilite_dir[0 + c][hfh - 2][j]; + } } } + +#ifdef _OPENMP + #pragma omp single +#endif + for (int i = hfh - 2; i > 0; i--) + for (int j = 2; j < hfw - 2; j++) { + //from bottom + if (hilite[3][i][j] > epsilon) { + hilite_dir[4 + 3][i][j] = 1.f; + } else { + hilite_dir[4 + 3][i][j] = (hilite_dir[4 + 3][(i + 1)][(j - 2)] + hilite_dir[4 + 3][(i + 1)][(j - 1)] + hilite_dir[4 + 3][(i + 1)][(j)] + hilite_dir[4 + 3][(i + 1)][(j + 1)] + hilite_dir[4 + 3][(i + 1)][(j + 2)]) == 0.f ? 0.f : 0.1f; + } + } + } + + if(plistener) { + progress += 0.05; + plistener->setProgress(progress); + } + #ifdef _OPENMP #pragma omp parallel for #endif @@ -814,7 +839,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b if (hilite[3][i][j] > epsilon) { hilite_dir[4 + c][i][j] = hilite[c][i][j] / hilite[3][i][j]; } else { - hilite_dir[4 + c][i][j] = 0.1 * ((hilite_dir[4 + c][(i + 1)][(j - 2)] + hilite_dir[4 + c][(i + 1)][(j - 1)] + hilite_dir[4 + c][(i + 1)][(j)] + hilite_dir[4 + c][(i + 1)][(j + 1)] + hilite_dir[4 + c][(i + 1)][(j + 2)]) / + hilite_dir[4 + c][i][j] = 0.1f * ((hilite_dir[4 + c][(i + 1)][(j - 2)] + hilite_dir[4 + c][(i + 1)][(j - 1)] + hilite_dir[4 + c][(i + 1)][(j)] + hilite_dir[4 + c][(i + 1)][(j + 1)] + hilite_dir[4 + c][(i + 1)][(j + 2)]) / (hilite_dir[4 + 3][(i + 1)][(j - 2)] + hilite_dir[4 + 3][(i + 1)][(j - 1)] + hilite_dir[4 + 3][(i + 1)][(j)] + hilite_dir[4 + 3][(i + 1)][(j + 1)] + hilite_dir[4 + 3][(i + 1)][(j + 2)] + epsilon)); } } From c56106beaee25a0bd370bc22e75de23258ae40f7 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 9 Jul 2019 17:39:53 +0200 Subject: [PATCH 5/9] color propagation: small speedup, also some code formating --- rtengine/hilite_recon.cc | 273 +++++++++++++++++++-------------------- 1 file changed, 133 insertions(+), 140 deletions(-) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index a726481dd..7fe527a78 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -22,9 +22,7 @@ // //////////////////////////////////////////////////////////////// -#ifndef NDEBUG #include -#endif #include #include @@ -81,19 +79,19 @@ void boxblur2(const float* const* src, float** dst, float** temp, int startY, in #endif for (int col = 0; col < bufferW - numCols + 1; col += numCols) { float len = box + 1; - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] = temp[0][col + n] / len; } for (int i = 1; i <= box; i++) { - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] += temp[i][col + n] / len; } } - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { dst[0][col + n] = tempvalN[n]; } for (int row = 1; row <= box; row++) { - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] = (tempvalN[n] * len + temp[(row + box)][col + n]) / (len + 1); dst[row][col + n] = tempvalN[n]; } @@ -101,14 +99,14 @@ void boxblur2(const float* const* src, float** dst, float** temp, int startY, in } const float rlen = 1.f / len; for (int row = box + 1; row < H - box; row++) { - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] = tempvalN[n] + (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) * rlen; dst[row][col + n] = tempvalN[n]; } } for (int row = H - box; row < H; row++) { - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] = (dst[(row - 1)][col + n] * len - temp[(row - box - 1)][col + n]) / (len - 1); dst[row][col + n] = tempvalN[n]; } @@ -118,7 +116,7 @@ void boxblur2(const float* const* src, float** dst, float** temp, int startY, in } } -void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int box, int samp ) +void boxblur_resamp(const float * const *src, float **dst, float ** temp, int H, int W, int box, int samp ) { #ifdef _OPENMP @@ -145,7 +143,7 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b for (int col = 1; col <= box; col++) { tempval = (tempval * len + src[row][col + box]) / (len + 1); - if(col % samp == 0) { + if (col % samp == 0) { temp[row][col / samp] = tempval; } @@ -157,7 +155,7 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b for (int col = box + 1; col < W - box; col++) { tempval = tempval + (src[row][col + box] - src[row][col - box - 1]) * oneByLen; - if(col % samp == 0) { + if (col % samp == 0) { temp[row][col / samp] = tempval; } } @@ -165,7 +163,7 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b for (int col = W - box; col < W; col++) { tempval = (tempval * len - src[row][col - box - 1]) / (len - 1); - if(col % samp == 0) { + if (col % samp == 0) { temp[row][col / samp] = tempval; } @@ -188,27 +186,27 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b for (int col = 0; col < (W / samp) - (numCols - 1); col += numCols) { float len = box + 1; - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] = temp[0][col + n] / len; } for (int i = 1; i <= box; i++) { - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] += temp[i][col + n] / len; } } - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { dst[0][col + n] = tempvalN[n]; } for (int row = 1; row <= box; row++) { - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] = (tempvalN[n] * len + temp[(row + box)][col + n]) / (len + 1); } - if(row % samp == 0) { - for(int n = 0; n < numCols; n++) { + if (row % samp == 0) { + for (int n = 0; n < numCols; n++) { dst[row / samp][col + n] = tempvalN[n]; } } @@ -217,24 +215,24 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b } const float rlen = 1.f / len; for (int row = box + 1; row < H - box; row++) { - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] = tempvalN[n] + (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) * rlen; } - if(row % samp == 0) { - for(int n = 0; n < numCols; n++) { + if (row % samp == 0) { + for (int n = 0; n < numCols; n++) { dst[row / samp][col + n] = tempvalN[n]; } } } for (int row = H - box; row < H; row++) { - for(int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; n++) { tempvalN[n] = (tempvalN[n] * len - temp[(row - box - 1)][col + n]) / (len - 1); } - if(row % samp == 0) { - for(int n = 0; n < numCols; n++) { + if (row % samp == 0) { + for (int n = 0; n < numCols; n++) { dst[row / samp][col + n] = tempvalN[n]; } } @@ -263,7 +261,7 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b for (int row = 1; row <= box; row++) { tempval = (tempval * len + temp[(row + box)][col]) / (len + 1); - if(row % samp == 0) { + if (row % samp == 0) { dst[row / samp][col] = tempval; } @@ -273,7 +271,7 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b for (int row = box + 1; row < H - box; row++) { tempval = tempval + (temp[(row + box)][col] - temp[(row - box - 1)][col]) / len; - if(row % samp == 0) { + if (row % samp == 0) { dst[row / samp][col] = tempval; } } @@ -281,7 +279,7 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b for (int row = H - box; row < H; row++) { tempval = (tempval * len - temp[(row - box - 1)][col]) / (len - 1); - if(row % samp == 0) { + if (row % samp == 0) { dst[row / samp][col] = tempval; } @@ -290,8 +288,6 @@ void boxblur_resamp(float **src, float **dst, float ** temp, int H, int W, int b } } } - - } } @@ -301,7 +297,7 @@ namespace rtengine extern const Settings* settings; -void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** blue) +void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blue) { BENCHFUN double progress = 0.0; @@ -311,8 +307,8 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b plistener->setProgress (progress); } - int height = H; - int width = W; + const int height = H; + const int width = W; constexpr int range = 2; constexpr int pitch = 4; @@ -330,20 +326,20 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b constexpr float itrans[ColorCount][ColorCount] = { { 1.f, 0.8660254f, -0.5f }, { 1.f, -0.8660254f, -0.5f }, { 1.f, 0.f, 1.f } }; - if(settings->verbose) - for(int c = 0; c < 3; c++) { + if (settings->verbose) + for (int c = 0; c < 3; c++) { printf("chmax[%d] : %f\tclmax[%d] : %f\tratio[%d] : %f\n", c, chmax[c], c, clmax[c], c, chmax[c] / clmax[c]); } float factor[3]; - for(int c = 0; c < ColorCount; c++) { + for (int c = 0; c < ColorCount; c++) { factor[c] = chmax[c] / clmax[c]; } float minFactor = min(factor[0], factor[1], factor[2]); - if(minFactor > 1.f) { // all 3 channels clipped + if (minFactor > 1.f) { // all 3 channels clipped // calculate clip factor per channel for (int c = 0; c < ColorCount; c++) { factor[c] /= minFactor; @@ -354,15 +350,15 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b float maxValNew = 0.f; for (int c = 0; c < ColorCount; c++) { - if(chmax[c] / factor[c] > maxValNew) { + if (chmax[c] / factor[c] > maxValNew) { maxValNew = chmax[c] / factor[c]; maxpos = c; } } - float clipFactor = clmax[maxpos] / maxValNew; + const float clipFactor = clmax[maxpos] / maxValNew; - if(clipFactor < maxpct) + if (clipFactor < maxpct) // if max clipFactor < maxpct (0.95) adjust per channel factors for (int c = 0; c < ColorCount; c++) { @@ -372,7 +368,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b factor[0] = factor[1] = factor[2] = 1.f; } - if(settings->verbose) + if (settings->verbose) for (int c = 0; c < ColorCount; c++) { printf("correction factor[%d] : %f\n", c, factor[c]); } @@ -384,10 +380,10 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b max_f[c] = chmax[c] * maxpct / factor[c]; } - float whitept = max(max_f[0], max_f[1], max_f[2]); - float clippt = min(max_f[0], max_f[1], max_f[2]); - float medpt = max_f[0] + max_f[1] + max_f[2] - whitept - clippt; - float blendpt = blendthresh * clippt; + const float whitept = max(max_f[0], max_f[1], max_f[2]); + const float clippt = min(max_f[0], max_f[1], max_f[2]); + const float medpt = max_f[0] + max_f[1] + max_f[2] - whitept - clippt; + const float blendpt = blendthresh * clippt; float medFactor[3]; for (int c = 0; c < ColorCount; c++) { @@ -401,7 +397,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b #pragma omp parallel for reduction(min:minx,miny) reduction(max:maxx,maxy) schedule(dynamic, 16) for (int i = 0; i < height; ++i) { for (int j = 0; j< width; ++j) { - if(red[i][j] >= max_f[0] || green[i][j] >= max_f[1] || blue[i][j] >= max_f[2]) { + if (red[i][j] >= max_f[0] || green[i][j] >= max_f[1] || blue[i][j] >= max_f[2]) { minx = std::min(minx, j); maxx = std::max(maxx, j); miny = std::min(miny, i); @@ -410,10 +406,10 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - std::cout << "minx : " << minx << std::endl; - std::cout << "maxx : " << maxx << std::endl; - std::cout << "miny : " << miny << std::endl; - std::cout << "maxy : " << maxy << std::endl; + if (plistener) { + progress += 0.05; + plistener->setProgress(progress); + } constexpr int blurBorder = 256; minx = std::max(0, minx - blurBorder); @@ -424,10 +420,16 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b const int blurHeight = maxy - miny + 1; const int bufferWidth = blurWidth + ((16 - (blurWidth % 16)) & 15); + std::cout << "minx : " << minx << std::endl; + std::cout << "maxx : " << maxx << std::endl; + std::cout << "miny : " << miny << std::endl; + std::cout << "maxy : " << maxy << std::endl; + std::cout << "blurWidth : " << blurWidth << std::endl; std::cout << "bufferWidth : " << bufferWidth << std::endl; - std::cout << "Corrected area reduced by factor: " << (((float)width * height) / (blurWidth * blurHeight)) << std::endl; + std::cout << "Corrected area reduced by factor: " << (((float)width * height) / (bufferWidth * blurHeight)) << std::endl; + std::cout << "Peak memory usage reduced from ~" << (30ul * ((size_t)width * (size_t)height)) / (1024*1024) << " Mb to ~" << (30ul * ((size_t)bufferWidth * (size_t)blurHeight)) / (1024*1024) << " Mb" << std::endl; multi_array2D channelblur(bufferWidth, blurHeight, 0, 48); array2D temp(bufferWidth, blurHeight); // allocate temporary buffer @@ -435,22 +437,22 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b boxblur2(red, channelblur[0], temp, miny, minx, blurHeight, blurWidth, bufferWidth, 4); - if(plistener) { - progress += 0.05; + if (plistener) { + progress += 0.07; plistener->setProgress(progress); } boxblur2(green, channelblur[1], temp, miny, minx, blurHeight, blurWidth, bufferWidth, 4); - if(plistener) { - progress += 0.05; + if (plistener) { + progress += 0.07; plistener->setProgress(progress); } boxblur2(blue, channelblur[2], temp, miny, minx, blurHeight, blurWidth, bufferWidth, 4); - if(plistener) { - progress += 0.05; + if (plistener) { + progress += 0.07; plistener->setProgress(progress); } @@ -459,8 +461,8 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b #pragma omp parallel for #endif - for(int i = 0; i < blurHeight; i++) - for(int j = 0; j < blurWidth; j++) { + for (int i = 0; i < blurHeight; i++) + for (int j = 0; j < blurWidth; j++) { channelblur[0][i][j] = fabsf(channelblur[0][i][j] - red[i + miny][j + minx]) + fabsf(channelblur[1][i][j] - green[i + miny][j + minx]) + fabsf(channelblur[2][i][j] - blue[i + miny][j + minx]); } @@ -468,15 +470,15 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b channelblur[c].free(); //free up some memory } - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } multi_array2D hilite_full(bufferWidth, blurHeight, ARRAY2D_CLEAR_DATA, 32); - if(plistener) { - progress += 0.10; + if (plistener) { + progress += 0.05; plistener->setProgress(progress); } @@ -506,9 +508,9 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } }//end of filling highlight array - float hipass_ave = 2.f * hipass_sum / (hipass_norm + epsilon); + const float hipass_ave = 2.f * hipass_sum / (hipass_norm + epsilon); - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } @@ -520,8 +522,8 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b temp.free(); // free temporary buffer - if(plistener) { - progress += 0.05; + if (plistener) { + progress += 0.07; plistener->setProgress(progress); } @@ -560,7 +562,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b for (int m = 0; m < 4; m++) { boxblur_resamp(hilite_full[m], hilite[m], temp2, blurHeight, blurWidth, range, pitch); - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } @@ -577,7 +579,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b multi_array2D hilite_dir0(hfh, hfw, ARRAY2D_CLEAR_DATA, 64); multi_array2D hilite_dir4(hfh, hfw, ARRAY2D_CLEAR_DATA, 64); - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } @@ -594,25 +596,25 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - if(hilite[3][2][j] <= epsilon) { + if (hilite[3][2][j] <= epsilon) { hilite_dir[0 + 3][0][j] = hilite_dir0[3][j][2]; } - if(hilite[3][3][j] <= epsilon) { + if (hilite[3][3][j] <= epsilon) { hilite_dir[0 + 3][1][j] = hilite_dir0[3][j][3]; } - if(hilite[3][hfh - 3][j] <= epsilon) { + if (hilite[3][hfh - 3][j] <= epsilon) { hilite_dir[4 + 3][hfh - 1][j] = hilite_dir0[3][j][hfh - 3]; } - if(hilite[3][hfh - 4][j] <= epsilon) { + if (hilite[3][hfh - 4][j] <= epsilon) { hilite_dir[4 + 3][hfh - 2][j] = hilite_dir0[3][j][hfh - 4]; } } for (int i = 2; i < hfh - 2; i++) { - if(hilite[3][i][hfw - 2] <= epsilon) { + if (hilite[3][i][hfw - 2] <= epsilon) { hilite_dir4[3][hfw - 1][i] = hilite_dir0[3][hfw - 2][i]; } } @@ -637,25 +639,25 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - if(hilite[3][2][j] <= epsilon) { + if (hilite[3][2][j] <= epsilon) { hilite_dir[0 + c][0][j] = hilite_dir0[c][j][2]; } - if(hilite[3][3][j] <= epsilon) { + if (hilite[3][3][j] <= epsilon) { hilite_dir[0 + c][1][j] = hilite_dir0[c][j][3]; } - if(hilite[3][hfh - 3][j] <= epsilon) { + if (hilite[3][hfh - 3][j] <= epsilon) { hilite_dir[4 + c][hfh - 1][j] = hilite_dir0[c][j][hfh - 3]; } - if(hilite[3][hfh - 4][j] <= epsilon) { + if (hilite[3][hfh - 4][j] <= epsilon) { hilite_dir[4 + c][hfh - 2][j] = hilite_dir0[c][j][hfh - 4]; } } for (int i = 2; i < hfh - 2; i++) { - if(hilite[3][i][hfw - 2] <= epsilon) { + if (hilite[3][i][hfw - 2] <= epsilon) { hilite_dir4[c][hfw - 1][i] = hilite_dir0[c][hfw - 2][i]; } } @@ -675,34 +677,34 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - if(hilite[3][2][j] <= epsilon) { + if (hilite[3][2][j] <= epsilon) { hilite_dir[0 + 3][0][j] += hilite_dir4[3][j][2]; } - if(hilite[3][hfh - 3][j] <= epsilon) { + if (hilite[3][hfh - 3][j] <= epsilon) { hilite_dir[4 + 3][hfh - 1][j] += hilite_dir4[3][j][hfh - 3]; } } for (int i = 2; i < hfh - 2; i++) { - if(hilite[3][i][0] <= epsilon) { + if (hilite[3][i][0] <= epsilon) { hilite_dir[0 + 3][i - 2][0] += hilite_dir4[3][0][i]; hilite_dir[4 + 3][i + 2][0] += hilite_dir4[3][0][i]; } - if(hilite[3][i][1] <= epsilon) { + if (hilite[3][i][1] <= epsilon) { hilite_dir[0 + 3][i - 2][1] += hilite_dir4[3][1][i]; hilite_dir[4 + 3][i + 2][1] += hilite_dir4[3][1][i]; } - if(hilite[3][i][hfw - 2] <= epsilon) { + if (hilite[3][i][hfw - 2] <= epsilon) { hilite_dir[0 + 3][i - 2][hfw - 2] += hilite_dir4[3][hfw - 2][i]; hilite_dir[4 + 3][i + 2][hfw - 2] += hilite_dir4[3][hfw - 2][i]; } } } } - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } @@ -727,27 +729,27 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - if(hilite[3][2][j] <= epsilon) { + if (hilite[3][2][j] <= epsilon) { hilite_dir[0 + c][0][j] += hilite_dir4[c][j][2]; } - if(hilite[3][hfh - 3][j] <= epsilon) { + if (hilite[3][hfh - 3][j] <= epsilon) { hilite_dir[4 + c][hfh - 1][j] += hilite_dir4[c][j][hfh - 3]; } } for (int i = 2; i < hfh - 2; i++) { - if(hilite[3][i][0] <= epsilon) { + if (hilite[3][i][0] <= epsilon) { hilite_dir[0 + c][i - 2][0] += hilite_dir4[c][0][i]; hilite_dir[4 + c][i + 2][0] += hilite_dir4[c][0][i]; } - if(hilite[3][i][1] <= epsilon) { + if (hilite[3][i][1] <= epsilon) { hilite_dir[0 + c][i - 2][1] += hilite_dir4[c][1][i]; hilite_dir[4 + c][i + 2][1] += hilite_dir4[c][1][i]; } - if(hilite[3][i][hfw - 2] <= epsilon) { + if (hilite[3][i][hfw - 2] <= epsilon) { hilite_dir[0 + c][i - 2][hfw - 2] += hilite_dir4[c][hfw - 2][i]; hilite_dir[4 + c][i + 2][hfw - 2] += hilite_dir4[c][hfw - 2][i]; } @@ -769,13 +771,13 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } for (int j = 2; j < hfw - 2; j++) { - if(hilite[3][hfh - 2][j] <= epsilon) { + if (hilite[3][hfh - 2][j] <= epsilon) { hilite_dir[4 + 3][hfh - 1][j] += hilite_dir[0 + 3][hfh - 2][j]; } } } } - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } @@ -802,7 +804,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } for (int j = 2; j < hfw - 2; j++) { - if(hilite[3][hfh - 2][j] <= epsilon) { + if (hilite[3][hfh - 2][j] <= epsilon) { hilite_dir[4 + c][hfh - 1][j] += hilite_dir[0 + c][hfh - 2][j]; } } @@ -823,7 +825,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } @@ -846,7 +848,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } @@ -911,38 +913,38 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b hilite_dir4[c][hfw - 1][hfh - 1] = hilite_dir4[c][hfw - 1][hfh - 2] = hilite_dir4[c][hfw - 2][hfh - 1] = hilite_dir4[c][hfw - 2][hfh - 2] = hilite_dir4[c][hfw - 3][hfh - 3]; } - if(plistener) { + if (plistener) { progress += 0.05; plistener->setProgress(progress); } //free up some memory - for(int c = 0; c < 4; c++) { + for (int c = 0; c < 4; c++) { hilite[c].free(); } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // now reconstruct clipped channels using color ratios - +StopWatch Stop1("last loop"); #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif for (int i = 0; i < blurHeight; i++) { - int i1 = min((i - (i % pitch)) / pitch, hfh - 1); + const int i1 = min((i - (i % pitch)) / pitch, hfh - 1); for (int j = 0; j < blurWidth; j++) { - float pixel[3] = {red[i + miny][j + minx], green[i + miny][j + minx], blue[i + miny][j + minx]}; + const float pixel[3] = {red[i + miny][j + minx], green[i + miny][j + minx], blue[i + miny][j + minx]}; if (pixel[0] < max_f[0] && pixel[1] < max_f[1] && pixel[2] < max_f[2]) { continue; //pixel not clipped } - int j1 = min((j - (j % pitch)) / pitch, hfw - 1); + const int j1 = min((j - (j % pitch)) / pitch, hfw - 1); //estimate recovered values using modified HLRecovery_blend algorithm - float rgb[ColorCount], rgb_blend[ColorCount] = {}, cam[2][ColorCount], lab[2][ColorCount], sum[2], chratio; + float rgb[ColorCount], rgb_blend[ColorCount] = {}, cam[2][ColorCount], lab[2][ColorCount], sum[2]; // Copy input pixel to rgb so it's easier to access in loops rgb[0] = pixel[0]; @@ -972,12 +974,10 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } - if(sum[0] == 0.f) { // avoid division by zero - sum[0] = epsilon; - } - - chratio = sqrtf(sum[1] / sum[0]); + // avoid division by zero + sum[0] = std::max(sum[0], epsilon); + const float chratio = sqrtf(sum[1] / sum[0]); // Apply ratio to lightness in lab space for (int c = 1; c < ColorCount; c++) { @@ -998,19 +998,18 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } // Copy converted pixel back - float rfrac = max(0.f, min(1.f, medFactor[0] * (pixel[0] - blendpt))); - float gfrac = max(0.f, min(1.f, medFactor[1] * (pixel[1] - blendpt))); - float bfrac = max(0.f, min(1.f, medFactor[2] * (pixel[2] - blendpt))); - if (pixel[0] > blendpt) { + const float rfrac = max(0.f, min(1.f, medFactor[0] * (pixel[0] - blendpt))); rgb_blend[0] = rfrac * rgb[0] + (1.f - rfrac) * pixel[0]; } if (pixel[1] > blendpt) { + const float gfrac = max(0.f, min(1.f, medFactor[1] * (pixel[1] - blendpt))); rgb_blend[1] = gfrac * rgb[1] + (1.f - gfrac) * pixel[1]; } if (pixel[2] > blendpt) { + const float bfrac = max(0.f, min(1.f, medFactor[2] * (pixel[2] - blendpt))); rgb_blend[2] = bfrac * rgb[2] + (1.f - bfrac) * pixel[2]; } @@ -1019,7 +1018,7 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b //there are clipped highlights //first, determine weighted average of unclipped extensions (weighting is by 'hue' proximity) - float totwt = 0.f; + bool totwt = false; float clipfix[3] = {0.f, 0.f, 0.f}; float Y = epsilon + rgb_blend[0] + rgb_blend[1] + rgb_blend[2]; @@ -1031,25 +1030,23 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b float Yhi = 1.f / (hilite_dir0[0][j1][i1] + hilite_dir0[1][j1][i1] + hilite_dir0[2][j1][i1]); if (Yhi < 2.f) { - float dirwt = 1.f / (1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir0[0][j1][i1] * Yhi) + + const float dirwt = 1.f / ((1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir0[0][j1][i1] * Yhi) + SQR(rgb_blend[1] - hilite_dir0[1][j1][i1] * Yhi) + - SQR(rgb_blend[2] - hilite_dir0[2][j1][i1] * Yhi))); - totwt = dirwt; - dirwt /= (hilite_dir0[3][j1][i1] + epsilon); + SQR(rgb_blend[2] - hilite_dir0[2][j1][i1] * Yhi))) * (hilite_dir0[3][j1][i1] + epsilon)); + totwt = true; clipfix[0] = dirwt * hilite_dir0[0][j1][i1]; clipfix[1] = dirwt * hilite_dir0[1][j1][i1]; clipfix[2] = dirwt * hilite_dir0[2][j1][i1]; } for (int dir = 0; dir < 2; dir++) { - float Yhi = 1.f / ( hilite_dir[dir * 4 + 0][i1][j1] + hilite_dir[dir * 4 + 1][i1][j1] + hilite_dir[dir * 4 + 2][i1][j1]); + const float Yhi = 1.f / ( hilite_dir[dir * 4 + 0][i1][j1] + hilite_dir[dir * 4 + 1][i1][j1] + hilite_dir[dir * 4 + 2][i1][j1]); if (Yhi < 2.f) { - float dirwt = 1.f / (1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir[dir * 4 + 0][i1][j1] * Yhi) + + const float dirwt = 1.f / ((1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir[dir * 4 + 0][i1][j1] * Yhi) + SQR(rgb_blend[1] - hilite_dir[dir * 4 + 1][i1][j1] * Yhi) + - SQR(rgb_blend[2] - hilite_dir[dir * 4 + 2][i1][j1] * Yhi))); - totwt += dirwt; - dirwt /= (hilite_dir[dir * 4 + 3][i1][j1] + epsilon); + SQR(rgb_blend[2] - hilite_dir[dir * 4 + 2][i1][j1] * Yhi))) * (hilite_dir[dir * 4 + 3][i1][j1] + epsilon)); + totwt = true; clipfix[0] += dirwt * hilite_dir[dir * 4 + 0][i1][j1]; clipfix[1] += dirwt * hilite_dir[dir * 4 + 1][i1][j1]; clipfix[2] += dirwt * hilite_dir[dir * 4 + 2][i1][j1]; @@ -1060,56 +1057,51 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b Yhi = 1.f / (hilite_dir4[0][j1][i1] + hilite_dir4[1][j1][i1] + hilite_dir4[2][j1][i1]); if (Yhi < 2.f) { - float dirwt = 1.f / (1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir4[0][j1][i1] * Yhi) + + const float dirwt = 1.f / ((1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir4[0][j1][i1] * Yhi) + SQR(rgb_blend[1] - hilite_dir4[1][j1][i1] * Yhi) + - SQR(rgb_blend[2] - hilite_dir4[2][j1][i1] * Yhi))); - totwt += dirwt; - dirwt /= (hilite_dir4[3][j1][i1] + epsilon); + SQR(rgb_blend[2] - hilite_dir4[2][j1][i1] * Yhi))) * (hilite_dir4[3][j1][i1] + epsilon)); + totwt = true; clipfix[0] += dirwt * hilite_dir4[0][j1][i1]; clipfix[1] += dirwt * hilite_dir4[1][j1][i1]; clipfix[2] += dirwt * hilite_dir4[2][j1][i1]; } - if(totwt == 0.f) { + if (UNLIKELY(!totwt)) { continue; } - clipfix[0] /= totwt; - clipfix[1] /= totwt; - clipfix[2] /= totwt; - //now correct clipped channels if (pixel[0] > max_f[0] && pixel[1] > max_f[1] && pixel[2] > max_f[2]) { //all channels clipped - float Y = (0.299 * clipfix[0] + 0.587 * clipfix[1] + 0.114 * clipfix[2]); + const float Y = 0.299f * clipfix[0] + 0.587f * clipfix[1] + 0.114f * clipfix[2]; - float factor = whitept / Y; + const float factor = whitept / Y; red[i + miny][j + minx] = clipfix[0] * factor; green[i + miny][j + minx] = clipfix[1] * factor; blue[i + miny][j + minx] = clipfix[2] * factor; } else {//some channels clipped - float notclipped[3] = {pixel[0] <= max_f[0] ? 1.f : 0.f, pixel[1] <= max_f[1] ? 1.f : 0.f, pixel[2] <= max_f[2] ? 1.f : 0.f}; + const float notclipped[3] = {pixel[0] <= max_f[0] ? 1.f : 0.f, pixel[1] <= max_f[1] ? 1.f : 0.f, pixel[2] <= max_f[2] ? 1.f : 0.f}; if (notclipped[0] == 0.f) { //red clipped - red[i + miny][j + minx] = max(red[i + miny][j + minx], (clipfix[0] * ((notclipped[1] * pixel[1] + notclipped[2] * pixel[2]) / - (notclipped[1] * clipfix[1] + notclipped[2] * clipfix[2] + epsilon)))); + red[i + miny][j + minx] = max(pixel[0], clipfix[0] * ((notclipped[1] * pixel[1] + notclipped[2] * pixel[2]) / + (notclipped[1] * clipfix[1] + notclipped[2] * clipfix[2] + epsilon))); } if (notclipped[1] == 0.f) { //green clipped - green[i + miny][j + minx] = max(green[i + miny][j + minx], (clipfix[1] * ((notclipped[2] * pixel[2] + notclipped[0] * pixel[0]) / - (notclipped[2] * clipfix[2] + notclipped[0] * clipfix[0] + epsilon)))); + green[i + miny][j + minx] = max(pixel[1], clipfix[1] * ((notclipped[2] * pixel[2] + notclipped[0] * pixel[0]) / + (notclipped[2] * clipfix[2] + notclipped[0] * clipfix[0] + epsilon))); } if (notclipped[2] == 0.f) { //blue clipped - blue[i + miny][j + minx] = max(blue[i + miny][j + minx], (clipfix[2] * ((notclipped[0] * pixel[0] + notclipped[1] * pixel[1]) / - (notclipped[0] * clipfix[0] + notclipped[1] * clipfix[1] + epsilon)))); + blue[i + miny][j + minx] = max(pixel[2], clipfix[2] * ((notclipped[0] * pixel[0] + notclipped[1] * pixel[1]) / + (notclipped[0] * clipfix[0] + notclipped[1] * clipfix[1] + epsilon))); } } - Y = (0.299 * red[i + miny][j + minx] + 0.587 * green[i + miny][j + minx] + 0.114 * blue[i + miny][j + minx]); + Y = 0.299f * red[i + miny][j + minx] + 0.587f * green[i + miny][j + minx] + 0.114f * blue[i + miny][j + minx]; if (Y > whitept) { - float factor = whitept / Y; + const float factor = whitept / Y; red[i + miny][j + minx] *= factor; green[i + miny][j + minx] *= factor; @@ -1117,8 +1109,9 @@ void RawImageSource :: HLRecovery_inpaint (float** red, float** green, float** b } } } +std::cout << "progress : " << progress << std::endl; - if(plistener) { + if (plistener) { plistener->setProgress(1.00); } From 0b1ba37c759a7417a4fa88f5643f8a5466def6b8 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 9 Jul 2019 19:40:08 +0200 Subject: [PATCH 6/9] color propagation: fix segfault whan there is nothing to reconstruct --- rtengine/hilite_recon.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index 7fe527a78..15077b92a 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -406,6 +406,10 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } + if (minx > maxx || miny > maxy) { // nothing to reconstruct + return; + } + if (plistener) { progress += 0.05; plistener->setProgress(progress); From 3f9c232f18b172367e2e43501a1db18ff24513dc Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 9 Jul 2019 23:15:38 +0200 Subject: [PATCH 7/9] hilite_recon.cc --- rtengine/hilite_recon.cc | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index 15077b92a..a0a8cb83d 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -3,9 +3,11 @@ // Highlight reconstruction // // copyright (c) 2008-2011 Emil Martinec +// copyright (c) 2019 Ingo Weyrich // // // code dated: June 16, 2011 +// code dated: July 09. 2019, speedups by Ingo Weyrich // // hilite_recon.cc is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -424,16 +426,6 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu const int blurHeight = maxy - miny + 1; const int bufferWidth = blurWidth + ((16 - (blurWidth % 16)) & 15); - std::cout << "minx : " << minx << std::endl; - std::cout << "maxx : " << maxx << std::endl; - std::cout << "miny : " << miny << std::endl; - std::cout << "maxy : " << maxy << std::endl; - - std::cout << "blurWidth : " << blurWidth << std::endl; - std::cout << "bufferWidth : " << bufferWidth << std::endl; - - std::cout << "Corrected area reduced by factor: " << (((float)width * height) / (bufferWidth * blurHeight)) << std::endl; - std::cout << "Peak memory usage reduced from ~" << (30ul * ((size_t)width * (size_t)height)) / (1024*1024) << " Mb to ~" << (30ul * ((size_t)bufferWidth * (size_t)blurHeight)) / (1024*1024) << " Mb" << std::endl; multi_array2D channelblur(bufferWidth, blurHeight, 0, 48); array2D temp(bufferWidth, blurHeight); // allocate temporary buffer @@ -929,7 +921,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // now reconstruct clipped channels using color ratios -StopWatch Stop1("last loop"); + #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif @@ -1113,7 +1105,6 @@ StopWatch Stop1("last loop"); } } } -std::cout << "progress : " << progress << std::endl; if (plistener) { plistener->setProgress(1.00); From 856b437983bbf26dea19a2f92989d898488715ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=B6ssie?= Date: Wed, 10 Jul 2019 13:16:03 +0200 Subject: [PATCH 8/9] Some minor code cleanups --- rtengine/hilite_recon.cc | 423 ++++++++++++++++++++------------------- 1 file changed, 221 insertions(+), 202 deletions(-) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index a0a8cb83d..c6c540e10 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -7,7 +7,7 @@ // // // code dated: June 16, 2011 -// code dated: July 09. 2019, speedups by Ingo Weyrich +// code dated: July 09, 2019, speedups by Ingo Weyrich // // hilite_recon.cc is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -25,17 +25,20 @@ //////////////////////////////////////////////////////////////// #include - -#include #include +#include + #include "array2D.h" +#include "opthelper.h" #include "rawimagesource.h" #include "rt_math.h" -#include "opthelper.h" + #define BENCHMARK #include "StopWatch.h" -namespace { +namespace +{ + void boxblur2(const float* const* src, float** dst, float** temp, int startY, int startX, int H, int W, int bufferW, int box) { constexpr int numCols = 16; @@ -46,27 +49,24 @@ void boxblur2(const float* const* src, float** dst, float** temp, int startY, in #ifdef _OPENMP #pragma omp parallel for #endif - - for (int row = 0; row < H; row++) { + for (int row = 0; row < H; ++row) { int len = box + 1; temp[row][0] = src[row + startY][startX] / len; - for (int j = 1; j <= box; j++) { + for (int j = 1; j <= box; ++j) { temp[row][0] += src[row + startY][j + startX] / len; } - for (int col = 1; col <= box; col++) { + for (int col = 1; col <= box; ++col, ++len) { temp[row][col] = (temp[row][col - 1] * len + src[row + startY][col + box + startX]) / (len + 1); - len ++; } - for (int col = box + 1; col < W - box; col++) { + for (int col = box + 1; col < W - box; ++col) { temp[row][col] = temp[row][col - 1] + (src[row + startY][col + box + startX] - src[row + startY][col - box - 1 + startX]) / len; } - for (int col = W - box; col < W; col++) { + for (int col = W - box; col < W; ++col, --len) { temp[row][col] = (temp[row][col - 1] * len - src[row + startY][col - box - 1 + startX]) / (len - 1); - len --; } } @@ -81,45 +81,50 @@ void boxblur2(const float* const* src, float** dst, float** temp, int startY, in #endif for (int col = 0; col < bufferW - numCols + 1; col += numCols) { float len = box + 1; - for (int n = 0; n < numCols; n++) { + + for (int n = 0; n < numCols; ++n) { tempvalN[n] = temp[0][col + n] / len; } - for (int i = 1; i <= box; i++) { - for (int n = 0; n < numCols; n++) { + + for (int i = 1; i <= box; ++i) { + for (int n = 0; n < numCols; ++n) { tempvalN[n] += temp[i][col + n] / len; } } - for (int n = 0; n < numCols; n++) { + + for (int n = 0; n < numCols; ++n) { dst[0][col + n] = tempvalN[n]; } - for (int row = 1; row <= box; row++) { - for (int n = 0; n < numCols; n++) { + + for (int row = 1; row <= box; ++row, ++len) { + for (int n = 0; n < numCols; ++n) { tempvalN[n] = (tempvalN[n] * len + temp[(row + box)][col + n]) / (len + 1); dst[row][col + n] = tempvalN[n]; } - len ++; - } - const float rlen = 1.f / len; - for (int row = box + 1; row < H - box; row++) { - for (int n = 0; n < numCols; n++) { - tempvalN[n] = tempvalN[n] + (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) * rlen; - dst[row][col + n] = tempvalN[n]; - } } - for (int row = H - box; row < H; row++) { - for (int n = 0; n < numCols; n++) { + const float rlen = 1.f / len; + + for (int row = box + 1; row < H - box; ++row) { + for (int n = 0; n < numCols; ++n) { + tempvalN[n] += (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) * rlen; + dst[row][col + n] = tempvalN[n]; + } + } + + for (int row = H - box; row < H; ++row, --len) { + for (int n = 0; n < numCols; ++n) { tempvalN[n] = (dst[(row - 1)][col + n] * len - temp[(row - box - 1)][col + n]) / (len - 1); dst[row][col + n] = tempvalN[n]; } - len --; } } } } -void boxblur_resamp(const float * const *src, float **dst, float ** temp, int H, int W, int box, int samp ) +void boxblur_resamp(const float* const* src, float** dst, float** temp, int H, int W, int box, int samp) { + assert(samp != 0); #ifdef _OPENMP #pragma omp parallel @@ -128,33 +133,29 @@ void boxblur_resamp(const float * const *src, float **dst, float ** temp, int H, #ifdef _OPENMP #pragma omp for #endif - //box blur image channel; box size = 2*box+1 //horizontal blur - for (int row = 0; row < H; row++) - { + for (int row = 0; row < H; ++row) { int len = box + 1; float tempval = src[row][0] / len; - for (int j = 1; j <= box; j++) { + for (int j = 1; j <= box; ++j) { tempval += src[row][j] / len; } temp[row][0] = tempval; - for (int col = 1; col <= box; col++) { + for (int col = 1; col <= box; ++col, ++len) { tempval = (tempval * len + src[row][col + box]) / (len + 1); if (col % samp == 0) { temp[row][col / samp] = tempval; } - - len ++; } - float oneByLen = 1.f / (float)len; + const float oneByLen = 1.f / static_cast(len); - for (int col = box + 1; col < W - box; col++) { + for (int col = box + 1; col < W - box; ++col) { tempval = tempval + (src[row][col + box] - src[row][col - box - 1]) * oneByLen; if (col % samp == 0) { @@ -162,84 +163,81 @@ void boxblur_resamp(const float * const *src, float **dst, float ** temp, int H, } } - for (int col = W - box; col < W; col++) { + for (int col = W - box; col < W; ++col, --len) { tempval = (tempval * len - src[row][col - box - 1]) / (len - 1); if (col % samp == 0) { temp[row][col / samp] = tempval; } - - len --; } } } constexpr int numCols = 8; // process numCols columns at once for better L1 CPU cache usage + #ifdef _OPENMP #pragma omp parallel #endif { float tempvalN[numCols] ALIGNED64; + #ifdef _OPENMP #pragma omp for nowait #endif - //vertical blur for (int col = 0; col < (W / samp) - (numCols - 1); col += numCols) { float len = box + 1; - for (int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; ++n) { tempvalN[n] = temp[0][col + n] / len; } - for (int i = 1; i <= box; i++) { - for (int n = 0; n < numCols; n++) { + for (int i = 1; i <= box; ++i) { + for (int n = 0; n < numCols; ++n) { tempvalN[n] += temp[i][col + n] / len; } } - for (int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; ++n) { dst[0][col + n] = tempvalN[n]; } - for (int row = 1; row <= box; row++) { - for (int n = 0; n < numCols; n++) { + for (int row = 1; row <= box; ++row, ++len) { + for (int n = 0; n < numCols; ++n) { tempvalN[n] = (tempvalN[n] * len + temp[(row + box)][col + n]) / (len + 1); } if (row % samp == 0) { - for (int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; ++n) { dst[row / samp][col + n] = tempvalN[n]; } } - - len ++; } + const float rlen = 1.f / len; - for (int row = box + 1; row < H - box; row++) { - for (int n = 0; n < numCols; n++) { - tempvalN[n] = tempvalN[n] + (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) * rlen; + + for (int row = box + 1; row < H - box; ++row) { + for (int n = 0; n < numCols; ++n) { + tempvalN[n] += (temp[(row + box)][col + n] - temp[(row - box - 1)][col + n]) * rlen; } if (row % samp == 0) { - for (int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; ++n) { dst[row / samp][col + n] = tempvalN[n]; } } } - for (int row = H - box; row < H; row++) { - for (int n = 0; n < numCols; n++) { + for (int row = H - box; row < H; ++row, --len) { + for (int n = 0; n < numCols; ++n) { tempvalN[n] = (tempvalN[n] * len - temp[(row - box - 1)][col + n]) / (len - 1); } if (row % samp == 0) { - for (int n = 0; n < numCols; n++) { + for (int n = 0; n < numCols; ++n) { dst[row / samp][col + n] = tempvalN[n]; } } - - len --; } } @@ -250,42 +248,38 @@ void boxblur_resamp(const float * const *src, float **dst, float ** temp, int H, { //vertical blur - for (int col = (W / samp) - ((W / samp) % numCols); col < W / samp; col++) { + for (int col = (W / samp) - ((W / samp) % numCols); col < W / samp; ++col) { int len = box + 1; float tempval = temp[0][col] / len; - for (int i = 1; i <= box; i++) { + for (int i = 1; i <= box; ++i) { tempval += temp[i][col] / len; } dst[0][col] = tempval; - for (int row = 1; row <= box; row++) { + for (int row = 1; row <= box; ++row, ++len) { tempval = (tempval * len + temp[(row + box)][col]) / (len + 1); if (row % samp == 0) { dst[row / samp][col] = tempval; } - - len ++; } - for (int row = box + 1; row < H - box; row++) { - tempval = tempval + (temp[(row + box)][col] - temp[(row - box - 1)][col]) / len; + for (int row = box + 1; row < H - box; ++row) { + tempval += (temp[(row + box)][col] - temp[(row - box - 1)][col]) / len; if (row % samp == 0) { dst[row / samp][col] = tempval; } } - for (int row = H - box; row < H; row++) { + for (int row = H - box; row < H; ++row, --len) { tempval = (tempval * len - temp[(row - box - 1)][col]) / (len - 1); if (row % samp == 0) { dst[row / samp][col] = tempval; } - - len --; } } } @@ -293,20 +287,20 @@ void boxblur_resamp(const float * const *src, float **dst, float ** temp, int H, } } + namespace rtengine { extern const Settings* settings; - -void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blue) +void RawImageSource::HLRecovery_inpaint(float** red, float** green, float** blue) { BENCHFUN double progress = 0.0; if (plistener) { - plistener->setProgressStr ("PROGRESSBAR_HLREC"); - plistener->setProgress (progress); + plistener->setProgressStr("PROGRESSBAR_HLREC"); + plistener->setProgress(progress); } const int height = H; @@ -321,29 +315,35 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu //%%%%%%%%%%%%%%%%%%%% //for blend algorithm: constexpr float blendthresh = 1.0; - constexpr int ColorCount = 3; // Transform matrixes rgb>lab and back - constexpr float trans[ColorCount][ColorCount] = - { { 1.f, 1.f, 1.f }, { 1.7320508f, -1.7320508f, 0.f }, { -1.f, -1.f, 2.f } }; - constexpr float itrans[ColorCount][ColorCount] = - { { 1.f, 0.8660254f, -0.5f }, { 1.f, -0.8660254f, -0.5f }, { 1.f, 0.f, 1.f } }; + constexpr float trans[3][3] = { + {1.f, 1.f, 1.f}, + {1.7320508f, -1.7320508f, 0.f}, + {-1.f, -1.f, 2.f} + }; + constexpr float itrans[3][3] = { + {1.f, 0.8660254f, -0.5f}, + {1.f, -0.8660254f, -0.5f}, + {1.f, 0.f, 1.f} + }; - if (settings->verbose) - for (int c = 0; c < 3; c++) { + if (settings->verbose) { + for (int c = 0; c < 3; ++c) { printf("chmax[%d] : %f\tclmax[%d] : %f\tratio[%d] : %f\n", c, chmax[c], c, clmax[c], c, chmax[c] / clmax[c]); } + } float factor[3]; - for (int c = 0; c < ColorCount; c++) { + for (int c = 0; c < 3; ++c) { factor[c] = chmax[c] / clmax[c]; } - float minFactor = min(factor[0], factor[1], factor[2]); + const float minFactor = min(factor[0], factor[1], factor[2]); if (minFactor > 1.f) { // all 3 channels clipped // calculate clip factor per channel - for (int c = 0; c < ColorCount; c++) { + for (int c = 0; c < 3; ++c) { factor[c] /= minFactor; } @@ -351,7 +351,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu int maxpos = 0; float maxValNew = 0.f; - for (int c = 0; c < ColorCount; c++) { + for (int c = 0; c < 3; ++c) { if (chmax[c] / factor[c] > maxValNew) { maxValNew = chmax[c] / factor[c]; maxpos = c; @@ -360,24 +360,26 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu const float clipFactor = clmax[maxpos] / maxValNew; - if (clipFactor < maxpct) - + if (clipFactor < maxpct) { // if max clipFactor < maxpct (0.95) adjust per channel factors - for (int c = 0; c < ColorCount; c++) { + for (int c = 0; c < 3; ++c) { factor[c] *= (maxpct / clipFactor); } + } } else { factor[0] = factor[1] = factor[2] = 1.f; } - if (settings->verbose) - for (int c = 0; c < ColorCount; c++) { + if (settings->verbose) { + for (int c = 0; c < 3; ++c) { printf("correction factor[%d] : %f\n", c, factor[c]); } + } - float max_f[3], thresh[3]; + float max_f[3]; + float thresh[3]; - for (int c = 0; c < ColorCount; c++) { + for (int c = 0; c < 3; ++c) { thresh[c] = chmax[c] * threshpct / factor[c]; max_f[c] = chmax[c] * maxpct / factor[c]; } @@ -386,11 +388,13 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu const float clippt = min(max_f[0], max_f[1], max_f[2]); const float medpt = max_f[0] + max_f[1] + max_f[2] - whitept - clippt; const float blendpt = blendthresh * clippt; + float medFactor[3]; - for (int c = 0; c < ColorCount; c++) { - medFactor[c] = max(1.0f, max_f[c] / medpt) / (-blendpt); + for (int c = 0; c < 3; ++c) { + medFactor[c] = max(1.0f, max_f[c] / medpt) / -blendpt; } + int minx = width - 1; int maxx = 0; int miny = height - 1; @@ -456,13 +460,13 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #ifdef _OPENMP #pragma omp parallel for #endif - - for (int i = 0; i < blurHeight; i++) - for (int j = 0; j < blurWidth; j++) { + for (int i = 0; i < blurHeight; ++i) { + for (int j = 0; j < blurWidth; ++j) { channelblur[0][i][j] = fabsf(channelblur[0][i][j] - red[i + miny][j + minx]) + fabsf(channelblur[1][i][j] - green[i + miny][j + minx]) + fabsf(channelblur[2][i][j] - blue[i + miny][j + minx]); } + } - for (int c = 1; c < 3; c++) { + for (int c = 1; c < 3; ++c) { channelblur[c].free(); //free up some memory } @@ -478,31 +482,36 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu plistener->setProgress(progress); } - double hipass_sum = 0.f; + double hipass_sum = 0.0; int hipass_norm = 0; // set up which pixels are clipped or near clipping #ifdef _OPENMP #pragma omp parallel for reduction(+:hipass_sum,hipass_norm) schedule(dynamic,16) #endif - - for (int i = 0; i < blurHeight; i++) { - for (int j = 0; j < blurWidth; j++) { - //if one or more channels is highlight but none are blown, add to highlight accumulator - if ((red[i + miny][j + minx] > thresh[0] || green[i + miny][j + minx] > thresh[1] || blue[i + miny][j + minx] > thresh[2]) && - (red[i + miny][j + minx] < max_f[0] && green[i + miny][j + minx] < max_f[1] && blue[i + miny][j + minx] < max_f[2])) { - + for (int i = 0; i < blurHeight; ++i) { + for (int j = 0; j < blurWidth; ++j) { + if ( + ( + red[i + miny][j + minx] > thresh[0] + || green[i + miny][j + minx] > thresh[1] + || blue[i + miny][j + minx] > thresh[2] + ) + && red[i + miny][j + minx] < max_f[0] + && green[i + miny][j + minx] < max_f[1] + && blue[i + miny][j + minx] < max_f[2] + ) { + // if one or more channels is highlight but none are blown, add to highlight accumulator hipass_sum += channelblur[0][i][j]; - hipass_norm ++; + ++hipass_norm; hilite_full[0][i][j] = red[i + miny][j + minx]; hilite_full[1][i][j] = green[i + miny][j + minx]; hilite_full[2][i][j] = blue[i + miny][j + minx]; hilite_full[3][i][j] = 1.f; - } } - }//end of filling highlight array + } const float hipass_ave = 2.f * hipass_sum / (hipass_norm + epsilon); @@ -526,9 +535,8 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif - - for (int i = 0; i < blurHeight; i++) { - for (int j = 0; j < blurWidth; j++) { + for (int i = 0; i < blurHeight; ++i) { + for (int j = 0; j < blurWidth; ++j) { if (channelblur[0][i][j] > hipass_ave) { //too much variation hilite_full[0][i][j] = hilite_full[1][i][j] = hilite_full[2][i][j] = hilite_full[3][i][j] = 0.f; @@ -545,17 +553,17 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu channelblur[0].free(); //free up some memory hilite_full4.free(); //free up some memory - int hfh = (blurHeight - (blurHeight % pitch)) / pitch; - int hfw = (blurWidth - (blurWidth % pitch)) / pitch; + const int hfh = (blurHeight - blurHeight % pitch) / pitch; + const int hfw = (blurWidth - blurWidth % pitch) / pitch; multi_array2D hilite(hfw + 1, hfh + 1, ARRAY2D_CLEAR_DATA, 48); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // blur and resample highlight data; range=size of blur, pitch=sample spacing - array2D temp2((blurWidth / pitch) + ((blurWidth % pitch) == 0 ? 0 : 1), blurHeight); + array2D temp2(blurWidth / pitch + (blurWidth % pitch == 0 ? 0 : 1), blurHeight); - for (int m = 0; m < 4; m++) { + for (int m = 0; m < 4; ++m) { boxblur_resamp(hilite_full[m], hilite[m], temp2, blurHeight, blurWidth, range, pitch); if (plistener) { @@ -566,7 +574,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu temp2.free(); - for (int c = 0; c < 4; c++) { + for (int c = 0; c < 4; ++c) { hilite_full[c].free(); //free up some memory } @@ -582,8 +590,8 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu //fill gaps in highlight map by directional extension //raster scan from four corners - for (int j = 1; j < hfw - 1; j++) { - for (int i = 2; i < hfh - 2; i++) { + for (int j = 1; j < hfw - 1; ++j) { + for (int i = 2; i < hfh - 2; ++i) { //from left if (hilite[3][i][j] > epsilon) { hilite_dir0[3][j][i] = 1.f; @@ -609,7 +617,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } - for (int i = 2; i < hfh - 2; i++) { + for (int i = 2; i < hfh - 2; ++i) { if (hilite[3][i][hfw - 2] <= epsilon) { hilite_dir4[3][hfw - 1][i] = hilite_dir0[3][hfw - 2][i]; } @@ -622,10 +630,9 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #ifdef _OPENMP #pragma omp for nowait #endif - - for (int c = 0; c < 3; c++) { - for (int j = 1; j < hfw - 1; j++) { - for (int i = 2; i < hfh - 2; i++) { + for (int c = 0; c < 3; ++c) { + for (int j = 1; j < hfw - 1; ++j) { + for (int i = 2; i < hfh - 2; ++i) { //from left if (hilite[3][i][j] > epsilon) { hilite_dir0[c][j][i] = hilite[c][i][j] / hilite[3][i][j]; @@ -652,7 +659,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } - for (int i = 2; i < hfh - 2; i++) { + for (int i = 2; i < hfh - 2; ++i) { if (hilite[3][i][hfw - 2] <= epsilon) { hilite_dir4[c][hfw - 1][i] = hilite_dir0[c][hfw - 2][i]; } @@ -663,8 +670,8 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #pragma omp single #endif { - for (int j = hfw - 2; j > 0; j--) { - for (int i = 2; i < hfh - 2; i++) { + for (int j = hfw - 2; j > 0; --j) { + for (int i = 2; i < hfh - 2; ++i) { //from right if (hilite[3][i][j] > epsilon) { hilite_dir4[3][j][i] = 1.f; @@ -682,7 +689,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } - for (int i = 2; i < hfh - 2; i++) { + for (int i = 2; i < hfh - 2; ++i) { if (hilite[3][i][0] <= epsilon) { hilite_dir[0 + 3][i - 2][0] += hilite_dir4[3][0][i]; hilite_dir[4 + 3][i + 2][0] += hilite_dir4[3][0][i]; @@ -712,10 +719,9 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #ifdef _OPENMP #pragma omp for nowait #endif - - for (int c = 0; c < 3; c++) { - for (int j = hfw - 2; j > 0; j--) { - for (int i = 2; i < hfh - 2; i++) { + for (int c = 0; c < 3; ++c) { + for (int j = hfw - 2; j > 0; --j) { + for (int i = 2; i < hfh - 2; ++i) { //from right if (hilite[3][i][j] > epsilon) { hilite_dir4[c][j][i] = hilite[c][i][j] / hilite[3][i][j]; @@ -734,7 +740,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } - for (int i = 2; i < hfh - 2; i++) { + for (int i = 2; i < hfh - 2; ++i) { if (hilite[3][i][0] <= epsilon) { hilite_dir[0 + c][i - 2][0] += hilite_dir4[c][0][i]; hilite_dir[4 + c][i + 2][0] += hilite_dir4[c][0][i]; @@ -756,8 +762,8 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #pragma omp single #endif { - for (int i = 1; i < hfh - 1; i++) - for (int j = 2; j < hfw - 2; j++) { + for (int i = 1; i < hfh - 1; ++i) + for (int j = 2; j < hfw - 2; ++j) { //from top if (hilite[3][i][j] > epsilon) { hilite_dir[0 + 3][i][j] = 1.f; @@ -766,7 +772,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } - for (int j = 2; j < hfw - 2; j++) { + for (int j = 2; j < hfw - 2; ++j) { if (hilite[3][hfh - 2][j] <= epsilon) { hilite_dir[4 + 3][hfh - 1][j] += hilite_dir[0 + 3][hfh - 2][j]; } @@ -785,10 +791,9 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #ifdef _OPENMP #pragma omp for nowait #endif - - for (int c = 0; c < 3; c++) { - for (int i = 1; i < hfh - 1; i++) { - for (int j = 2; j < hfw - 2; j++) { + for (int c = 0; c < 3; ++c) { + for (int i = 1; i < hfh - 1; ++i) { + for (int j = 2; j < hfw - 2; ++j) { //from top if (hilite[3][i][j] > epsilon) { hilite_dir[0 + c][i][j] = hilite[c][i][j] / hilite[3][i][j]; @@ -799,7 +804,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } - for (int j = 2; j < hfw - 2; j++) { + for (int j = 2; j < hfw - 2; ++j) { if (hilite[3][hfh - 2][j] <= epsilon) { hilite_dir[4 + c][hfh - 1][j] += hilite_dir[0 + c][hfh - 2][j]; } @@ -810,8 +815,8 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #ifdef _OPENMP #pragma omp single #endif - for (int i = hfh - 2; i > 0; i--) - for (int j = 2; j < hfw - 2; j++) { + for (int i = hfh - 2; i > 0; --i) { + for (int j = 2; j < hfw - 2; ++j) { //from bottom if (hilite[3][i][j] > epsilon) { hilite_dir[4 + 3][i][j] = 1.f; @@ -820,6 +825,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } } + } if (plistener) { progress += 0.05; @@ -829,10 +835,9 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #ifdef _OPENMP #pragma omp parallel for #endif - - for (int c = 0; c < 4; c++) { - for (int i = hfh - 2; i > 0; i--) { - for (int j = 2; j < hfw - 2; j++) { + for (int c = 0; c < 4; ++c) { + for (int i = hfh - 2; i > 0; --i) { + for (int j = 2; j < hfw - 2; ++j) { //from bottom if (hilite[3][i][j] > epsilon) { hilite_dir[4 + c][i][j] = hilite[c][i][j] / hilite[3][i][j]; @@ -850,20 +855,22 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } //fill in edges - for (int dir = 0; dir < 2; dir++) { - for (int i = 1; i < hfh - 1; i++) - for (int c = 0; c < 4; c++) { + for (int dir = 0; dir < 2; ++dir) { + for (int i = 1; i < hfh - 1; ++i) { + for (int c = 0; c < 4; ++c) { hilite_dir[dir * 4 + c][i][0] = hilite_dir[dir * 4 + c][i][1]; hilite_dir[dir * 4 + c][i][hfw - 1] = hilite_dir[dir * 4 + c][i][hfw - 2]; } + } - for (int j = 1; j < hfw - 1; j++) - for (int c = 0; c < 4; c++) { + for (int j = 1; j < hfw - 1; ++j) { + for (int c = 0; c < 4; ++c) { hilite_dir[dir * 4 + c][0][j] = hilite_dir[dir * 4 + c][1][j]; hilite_dir[dir * 4 + c][hfh - 1][j] = hilite_dir[dir * 4 + c][hfh - 2][j]; } + } - for (int c = 0; c < 4; c++) { + for (int c = 0; c < 4; ++c) { hilite_dir[dir * 4 + c][0][0] = hilite_dir[dir * 4 + c][1][0] = hilite_dir[dir * 4 + c][0][1] = hilite_dir[dir * 4 + c][1][1] = hilite_dir[dir * 4 + c][2][2]; hilite_dir[dir * 4 + c][0][hfw - 1] = hilite_dir[dir * 4 + c][1][hfw - 1] = hilite_dir[dir * 4 + c][0][hfw - 2] = hilite_dir[dir * 4 + c][1][hfw - 2] = hilite_dir[dir * 4 + c][2][hfw - 3]; hilite_dir[dir * 4 + c][hfh - 1][0] = hilite_dir[dir * 4 + c][hfh - 2][0] = hilite_dir[dir * 4 + c][hfh - 1][1] = hilite_dir[dir * 4 + c][hfh - 2][1] = hilite_dir[dir * 4 + c][hfh - 3][2]; @@ -871,38 +878,42 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } } - for (int i = 1; i < hfh - 1; i++) - for (int c = 0; c < 4; c++) { + for (int i = 1; i < hfh - 1; ++i) { + for (int c = 0; c < 4; ++c) { hilite_dir0[c][0][i] = hilite_dir0[c][1][i]; hilite_dir0[c][hfw - 1][i] = hilite_dir0[c][hfw - 2][i]; } + } - for (int j = 1; j < hfw - 1; j++) - for (int c = 0; c < 4; c++) { + for (int j = 1; j < hfw - 1; ++j) { + for (int c = 0; c < 4; ++c) { hilite_dir0[c][j][0] = hilite_dir0[c][j][1]; hilite_dir0[c][j][hfh - 1] = hilite_dir0[c][j][hfh - 2]; } + } - for (int c = 0; c < 4; c++) { + for (int c = 0; c < 4; ++c) { hilite_dir0[c][0][0] = hilite_dir0[c][0][1] = hilite_dir0[c][1][0] = hilite_dir0[c][1][1] = hilite_dir0[c][2][2]; hilite_dir0[c][hfw - 1][0] = hilite_dir0[c][hfw - 1][1] = hilite_dir0[c][hfw - 2][0] = hilite_dir0[c][hfw - 2][1] = hilite_dir0[c][hfw - 3][2]; hilite_dir0[c][0][hfh - 1] = hilite_dir0[c][0][hfh - 2] = hilite_dir0[c][1][hfh - 1] = hilite_dir0[c][1][hfh - 2] = hilite_dir0[c][2][hfh - 3]; hilite_dir0[c][hfw - 1][hfh - 1] = hilite_dir0[c][hfw - 1][hfh - 2] = hilite_dir0[c][hfw - 2][hfh - 1] = hilite_dir0[c][hfw - 2][hfh - 2] = hilite_dir0[c][hfw - 3][hfh - 3]; } - for (int i = 1; i < hfh - 1; i++) - for (int c = 0; c < 4; c++) { + for (int i = 1; i < hfh - 1; ++i) { + for (int c = 0; c < 4; ++c) { hilite_dir4[c][0][i] = hilite_dir4[c][1][i]; hilite_dir4[c][hfw - 1][i] = hilite_dir4[c][hfw - 2][i]; } + } - for (int j = 1; j < hfw - 1; j++) - for (int c = 0; c < 4; c++) { + for (int j = 1; j < hfw - 1; ++j) { + for (int c = 0; c < 4; ++c) { hilite_dir4[c][j][0] = hilite_dir4[c][j][1]; hilite_dir4[c][j][hfh - 1] = hilite_dir4[c][j][hfh - 2]; } + } - for (int c = 0; c < 4; c++) { + for (int c = 0; c < 4; ++c) { hilite_dir4[c][0][0] = hilite_dir4[c][0][1] = hilite_dir4[c][1][0] = hilite_dir4[c][1][1] = hilite_dir4[c][2][2]; hilite_dir4[c][hfw - 1][0] = hilite_dir4[c][hfw - 1][1] = hilite_dir4[c][hfw - 2][0] = hilite_dir4[c][hfw - 2][1] = hilite_dir4[c][hfw - 3][2]; hilite_dir4[c][0][hfh - 1] = hilite_dir4[c][0][hfh - 2] = hilite_dir4[c][1][hfh - 1] = hilite_dir4[c][1][hfh - 2] = hilite_dir4[c][2][hfh - 3]; @@ -915,7 +926,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu } //free up some memory - for (int c = 0; c < 4; c++) { + for (int c = 0; c < 4; ++c) { hilite[c].free(); } @@ -925,47 +936,52 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif + for (int i = 0; i < blurHeight; ++i) { + const int i1 = min((i - i % pitch) / pitch, hfh - 1); - for (int i = 0; i < blurHeight; i++) { - const int i1 = min((i - (i % pitch)) / pitch, hfh - 1); - - for (int j = 0; j < blurWidth; j++) { - - const float pixel[3] = {red[i + miny][j + minx], green[i + miny][j + minx], blue[i + miny][j + minx]}; + for (int j = 0; j < blurWidth; ++j) { + const float pixel[3] = { + red[i + miny][j + minx], + green[i + miny][j + minx], + blue[i + miny][j + minx] + }; if (pixel[0] < max_f[0] && pixel[1] < max_f[1] && pixel[2] < max_f[2]) { continue; //pixel not clipped } - const int j1 = min((j - (j % pitch)) / pitch, hfw - 1); + const int j1 = min((j - j % pitch) / pitch, hfw - 1); //estimate recovered values using modified HLRecovery_blend algorithm - float rgb[ColorCount], rgb_blend[ColorCount] = {}, cam[2][ColorCount], lab[2][ColorCount], sum[2]; - - // Copy input pixel to rgb so it's easier to access in loops - rgb[0] = pixel[0]; - rgb[1] = pixel[1]; - rgb[2] = pixel[2]; + float rgb[3] = { + pixel[0], + pixel[1], + pixel[2] + };// Copy input pixel to rgb so it's easier to access in loops + float rgb_blend[3] = {}; + float cam[2][3]; + float lab[2][3]; + float sum[2]; // Initialize cam with raw input [0] and potentially clipped input [1] - for (int c = 0; c < ColorCount; c++) { + for (int c = 0; c < 3; ++c) { cam[0][c] = rgb[c]; cam[1][c] = min(cam[0][c], clippt); } // Calculate the lightness correction ratio (chratio) - for (int i2 = 0; i2 < 2; i2++) { - for (int c = 0; c < ColorCount; c++) { + for (int i2 = 0; i2 < 2; ++i2) { + for (int c = 0; c < 3; ++c) { lab[i2][c] = 0; - for (int j = 0; j < ColorCount; j++) { + for (int j = 0; j < 3; ++j) { lab[i2][c] += trans[c][j] * cam[i2][j]; } } sum[i2] = 0.f; - for (int c = 1; c < ColorCount; c++) { + for (int c = 1; c < 3; ++c) { sum[i2] += SQR(lab[i2][c]); } } @@ -976,36 +992,36 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu const float chratio = sqrtf(sum[1] / sum[0]); // Apply ratio to lightness in lab space - for (int c = 1; c < ColorCount; c++) { + for (int c = 1; c < 3; ++c) { lab[0][c] *= chratio; } // Transform back from lab to RGB - for (int c = 0; c < ColorCount; c++) { - cam[0][c] = 0; + for (int c = 0; c < 3; ++c) { + cam[0][c] = 0.f; - for (int j = 0; j < ColorCount; j++) { + for (int j = 0; j < 3; ++j) { cam[0][c] += itrans[c][j] * lab[0][j]; } } - for (int c = 0; c < ColorCount; c++) { - rgb[c] = cam[0][c] / ColorCount; + for (int c = 0; c < 3; ++c) { + rgb[c] = cam[0][c] / 3; } // Copy converted pixel back if (pixel[0] > blendpt) { - const float rfrac = max(0.f, min(1.f, medFactor[0] * (pixel[0] - blendpt))); + const float rfrac = LIM01(medFactor[0] * (pixel[0] - blendpt)); rgb_blend[0] = rfrac * rgb[0] + (1.f - rfrac) * pixel[0]; } if (pixel[1] > blendpt) { - const float gfrac = max(0.f, min(1.f, medFactor[1] * (pixel[1] - blendpt))); + const float gfrac = LIM01(medFactor[1] * (pixel[1] - blendpt)); rgb_blend[1] = gfrac * rgb[1] + (1.f - gfrac) * pixel[1]; } if (pixel[2] > blendpt) { - const float bfrac = max(0.f, min(1.f, medFactor[2] * (pixel[2] - blendpt))); + const float bfrac = LIM01(medFactor[2] * (pixel[2] - blendpt)); rgb_blend[2] = bfrac * rgb[2] + (1.f - bfrac) * pixel[2]; } @@ -1019,7 +1035,7 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu float Y = epsilon + rgb_blend[0] + rgb_blend[1] + rgb_blend[2]; - for (int c = 0; c < ColorCount; c++) { + for (int c = 0; c < 3; ++c) { rgb_blend[c] /= Y; } @@ -1035,13 +1051,13 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu clipfix[2] = dirwt * hilite_dir0[2][j1][i1]; } - for (int dir = 0; dir < 2; dir++) { - const float Yhi = 1.f / ( hilite_dir[dir * 4 + 0][i1][j1] + hilite_dir[dir * 4 + 1][i1][j1] + hilite_dir[dir * 4 + 2][i1][j1]); + for (int dir = 0; dir < 2; ++dir) { + const float Yhi2 = 1.f / ( hilite_dir[dir * 4 + 0][i1][j1] + hilite_dir[dir * 4 + 1][i1][j1] + hilite_dir[dir * 4 + 2][i1][j1]); - if (Yhi < 2.f) { - const float dirwt = 1.f / ((1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir[dir * 4 + 0][i1][j1] * Yhi) + - SQR(rgb_blend[1] - hilite_dir[dir * 4 + 1][i1][j1] * Yhi) + - SQR(rgb_blend[2] - hilite_dir[dir * 4 + 2][i1][j1] * Yhi))) * (hilite_dir[dir * 4 + 3][i1][j1] + epsilon)); + if (Yhi2 < 2.f) { + const float dirwt = 1.f / ((1.f + 65535.f * (SQR(rgb_blend[0] - hilite_dir[dir * 4 + 0][i1][j1] * Yhi2) + + SQR(rgb_blend[1] - hilite_dir[dir * 4 + 1][i1][j1] * Yhi2) + + SQR(rgb_blend[2] - hilite_dir[dir * 4 + 2][i1][j1] * Yhi2))) * (hilite_dir[dir * 4 + 3][i1][j1] + epsilon)); totwt = true; clipfix[0] += dirwt * hilite_dir[dir * 4 + 0][i1][j1]; clipfix[1] += dirwt * hilite_dir[dir * 4 + 1][i1][j1]; @@ -1076,7 +1092,11 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu green[i + miny][j + minx] = clipfix[1] * factor; blue[i + miny][j + minx] = clipfix[2] * factor; } else {//some channels clipped - const float notclipped[3] = {pixel[0] <= max_f[0] ? 1.f : 0.f, pixel[1] <= max_f[1] ? 1.f : 0.f, pixel[2] <= max_f[2] ? 1.f : 0.f}; + const float notclipped[3] = { + pixel[0] <= max_f[0] ? 1.f : 0.f, + pixel[1] <= max_f[1] ? 1.f : 0.f, + pixel[2] <= max_f[2] ? 1.f : 0.f + }; if (notclipped[0] == 0.f) { //red clipped red[i + miny][j + minx] = max(pixel[0], clipfix[0] * ((notclipped[1] * pixel[1] + notclipped[2] * pixel[2]) / @@ -1112,6 +1132,5 @@ void RawImageSource::HLRecovery_inpaint (float** red, float** green, float** blu }// end of HLReconstruction - } From d1c9a5f989629700f3fefa8aeb872f3cafbb54cc Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Fri, 12 Jul 2019 13:46:45 +0200 Subject: [PATCH 9/9] Removed timing code --- rtengine/hilite_recon.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index c6c540e10..b0a7e6229 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -33,9 +33,6 @@ #include "rawimagesource.h" #include "rt_math.h" -#define BENCHMARK -#include "StopWatch.h" - namespace { @@ -295,7 +292,6 @@ extern const Settings* settings; void RawImageSource::HLRecovery_inpaint(float** red, float** green, float** blue) { - BENCHFUN double progress = 0.0; if (plistener) {