diff --git a/rtengine/boxblur.h b/rtengine/boxblur.h index b25d3e4b7..dbfc0ca3a 100644 --- a/rtengine/boxblur.h +++ b/rtengine/boxblur.h @@ -129,6 +129,94 @@ template void boxblur (T** src, A** dst, int radx, int rady, i } +template void boxblurnew (T** src, A** dst, T* buffer, int radx, int rady, int W, int H) +{ + + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + //box blur image; box range = (radx,rady) + + float* temp = buffer; + + if (radx == 0) { +#ifdef _OPENMP + #pragma omp for +#endif + + for (int row = 0; row < H; row++) + for (int col = 0; col < W; col++) { + temp[row * W + col] = (float)src[row][col]; + } + } else { + //horizontal blur +#ifdef _OPENMP + #pragma omp for +#endif + + for (int row = 0; row < H; row++) { + int len = radx + 1; + temp[row * W + 0] = (float)src[row][0] / len; + + for (int j = 1; j <= radx; j++) { + temp[row * W + 0] += (float)src[row][j] / len; + } + + for (int col = 1; col <= radx; col++) { + temp[row * W + col] = (temp[row * W + col - 1] * len + (float)src[row][col + radx]) / (len + 1); + len ++; + } + + for (int col = radx + 1; col < W - radx; col++) { + temp[row * W + col] = temp[row * W + col - 1] + ((float)(src[row][col + radx] - src[row][col - radx - 1])) / len; + } + + for (int col = W - radx; col < W; col++) { + temp[row * W + col] = (temp[row * W + col - 1] * len - src[row][col - radx - 1]) / (len - 1); + len --; + } + } + } + + if (rady == 0) { +#ifdef _OPENMP + #pragma omp for +#endif + + for (int row = 0; row < H; row++) + for (int col = 0; col < W; col++) { + dst[row][col] = temp[row * W + col]; + } + } else { + //vertical blur +#ifdef _OPENMP + #pragma omp for +#endif + + for (int col = 0; col < W; col++) { + int len = rady + 1; + dst[0][col] = temp[0 * W + col] / len; + + for (int i = 1; i <= rady; i++) { + dst[0][col] += temp[i * W + col] / len; + } + + for (int row = 1; row <= rady; row++) { + dst[row][col] = (dst[(row - 1)][col] * len + temp[(row + rady) * W + col]) / (len + 1); + len ++; + } + + for (int row = rady + 1; row < H - rady; row++) { + dst[row][col] = dst[(row - 1)][col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len; + } + + for (int row = H - rady; row < H; row++) { + dst[row][col] = (dst[(row - 1)][col] * len - temp[(row - rady - 1) * W + col]) / (len - 1); + len --; + } + } + } + +} + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/rtengine/gauss.h b/rtengine/gauss.h index 5ee144a6a..d5038da72 100644 --- a/rtengine/gauss.h +++ b/rtengine/gauss.h @@ -24,6 +24,7 @@ #include #include "opthelper.h" #include "stdio.h" +#include "boxblur.h" // classical filtering if the support window is small: template void gaussHorizontal3 (T** src, T** dst, int W, int H, const float c0, const float c1) @@ -74,8 +75,8 @@ template void gaussVertical3 (T** src, T** dst, int W, int H, const flo #ifdef __SSE2__ template SSEFUNCTION void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1) { - __m128 Tv, Tm1v, Tp1v; - __m128 c0v, c1v; + vfloat Tv, Tm1v, Tp1v; + vfloat c0v, c1v; c0v = F2V(c0); c1v = F2V(c1); #ifdef _OPENMP @@ -121,8 +122,8 @@ template SSEFUNCTION void gaussHorizontal3Sse (T** src, T** dst, int W, { float tmp[W][4] ALIGNED16; - __m128 Tv, Tm1v, Tp1v; - __m128 c0v, c1v; + vfloat Tv, Tm1v, Tp1v; + vfloat c0v, c1v; c0v = F2V(c0); c1v = F2V(c1); #ifdef _OPENMP @@ -240,12 +241,12 @@ template SSEFUNCTION void gaussHorizontalSse (T** src, T** dst, int W, M[i][j] /= (1.0 + b1 - b2 + b3) * (1.0 - b1 - b2 - b3); } + vfloat Rv; + vfloat Tv, Tm2v, Tm3v; + vfloat Bv, b1v, b2v, b3v; + vfloat temp2W, temp2Wp1; float tmp[W][4] ALIGNED16; float tmpV[4] ALIGNED16; - __m128 Rv; - __m128 Tv, Tm2v, Tm3v; - __m128 Bv, b1v, b2v, b3v; - __m128 temp2W, temp2Wp1; Bv = F2V(B); b1v = F2V(b1); b2v = F2V(b2); @@ -527,10 +528,10 @@ template SSEFUNCTION void gaussVerticalSse (T** src, T** dst, int W, in } float tmp[H][4] ALIGNED16; - __m128 Rv; - __m128 Tv, Tm2v, Tm3v; - __m128 Bv, b1v, b2v, b3v; - __m128 temp2W, temp2Wp1; + vfloat Rv; + vfloat Tv, Tm2v, Tm3v; + vfloat Bv, b1v, b2v, b3v; + vfloat temp2W, temp2Wp1; Bv = F2V(B); b1v = F2V(b1); b2v = F2V(b2); @@ -761,36 +762,47 @@ template void gaussVertical (T** src, T** dst, int W, int H, double sig } } -template void gaussianBlur(T** src, T** dst, const int W, const int H, const double sigma, bool forceLowSigma = false) +template void gaussianBlur(T** src, T** dst, const int W, const int H, const double sigma, T *buffer = NULL) { - double newSigma = sigma; - if(forceLowSigma && newSigma > 170.f) { - newSigma /= sqrt(2.0); - - if(newSigma < 0.6) { // barrier to avoid using simple gauss version for higher radius - newSigma = sigma; - forceLowSigma = false; - gaussianBlur(src,dst,W,H,newSigma,forceLowSigma); - } else { - gaussianBlur(src,dst,W,H,newSigma,forceLowSigma); - gaussianBlur(dst,dst,W,H,newSigma,forceLowSigma); + if(buffer) { // use iterated boxblur to approximate gaussian blur + // Compute ideal averaging filter width and number of iterations + int n = 1; + double wIdeal = sqrt((12*sigma*sigma)+1); + while(wIdeal >= (W/2-1) || wIdeal >= (H/2-1)) { + n++; + wIdeal = sqrt((12*sigma*sigma/n)+1); } + + if(n<3) { + n = 3; + wIdeal = sqrt((12*sigma*sigma/n)+1); + } else if(n>6) + n=6; + + int wl = wIdeal; + if(wl%2==0) wl--; + int wu = wl+2; + + double mIdeal = (12*sigma*sigma - n*wl*wl - 4*n*wl - 3*n)/(-4*wl - 4); + int m = round(mIdeal); + + int sizes[n]; + for(int i=0; i (src, dst, W, H, newSigma); - gaussVertical (dst, dst, W, H, newSigma); + gaussHorizontal (src, dst, W, H, sigma); + gaussVertical (dst, dst, W, H, sigma); } -// #pragma omp critical -// printf("gauss sigma : %f / %f\n",sigma,newSigma); - -/* - if(forceLowSigma && newSigma > 170.f) { - gaussianBlur(dst,dst,W,H,newSigma,forceLowSigma); -// gaussHorizontal (dst, dst, W, H, newSigma); -// gaussVertical (dst, dst, W, H, newSigma); - } -*/ } #endif diff --git a/rtengine/ipretinex.cc b/rtengine/ipretinex.cc index a7901fb71..e8eef4b27 100644 --- a/rtengine/ipretinex.cc +++ b/rtengine/ipretinex.cc @@ -311,16 +311,17 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e const float logBetaGain = xlogf(16384.f); const float pond = logBetaGain / (float) scal; + float *buffer = new float[W_L*H_L];; #ifdef _OPENMP #pragma omp parallel #endif { for ( int scale = scal - 1; scale >= 0; scale-- ) { - if(scale == scal - 1) { // probably large sigma. Use double gauss with sigma divided by sqrt(2.0) - gaussianBlur (src, out, W_L, H_L, RetinexScales[scale], true); + if(scale == scal - 1) { + gaussianBlur (src, out, W_L, H_L, RetinexScales[scale], buffer); } else { // reuse result of last iteration - gaussianBlur (out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1]))); + gaussianBlur (out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), buffer); } #ifdef __SSE2__ @@ -363,7 +364,7 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e } } } - + delete [] buffer; delete [] outBuffer; delete [] srcBuffer;