From 10b085b01e06b3094f298563085009d73501810c Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 5 Dec 2019 21:40:32 +0100 Subject: [PATCH] lmmse demosaic: own compilation unit and some cleanups --- rtengine/CMakeLists.txt | 1 + rtengine/demosaic_algos.cc | 985 ------------------------------------- rtengine/lmmse_demosaic.cc | 824 +++++++++++++++++++++++++++++++ rtengine/rawimagesource.h | 3 +- 4 files changed, 826 insertions(+), 987 deletions(-) create mode 100644 rtengine/lmmse_demosaic.cc diff --git a/rtengine/CMakeLists.txt b/rtengine/CMakeLists.txt index f58afde5e..6bce68d1a 100644 --- a/rtengine/CMakeLists.txt +++ b/rtengine/CMakeLists.txt @@ -103,6 +103,7 @@ set(RTENGINESOURCEFILES labimage.cc lcp.cc lj92.c + lmmse_demosaic.cc loadinitial.cc myfile.cc panasonic_decoders.cc diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index d547d3431..c48a002fb 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -21,11 +21,9 @@ #include "rawimagesource.h" #include "rawimage.h" -#include "mytime.h" #include "rt_math.h" #include "color.h" #include "../rtgui/multilangmgr.h" -#include "sleef.h" #include "opthelper.h" #include "median.h" //#define BENCHMARK @@ -39,16 +37,6 @@ using namespace std; namespace rtengine { -#undef ABS - -#define ABS(a) ((a)<0?-(a):(a)) -#define CLIREF(x) LIM(x,-200000.0f,200000.0f) // avoid overflow : do not act directly on image[] or pix[] -#define x1125(a) (a + xdivf(a, 3)) -#define x0875(a) (a - xdivf(a, 3)) -#define x0250(a) xdivf(a, 2) -#define x00625(a) xdivf(a, 4) -#define x0125(a) xdivf(a, 3) - #undef fc #define fc(row,col) \ (ri->get_filters() >> ((((row) << 1 & 14) + ((col) & 1)) << 1) & 3) @@ -210,628 +198,6 @@ void RawImageSource::border_interpolate( int winw, int winh, int lborders, const } -// LSMME demosaicing algorithm -// L. Zhang and X. Wu, -// Color demozaicing via directional Linear Minimum Mean Square-error Estimation, -// IEEE Trans. on Image Processing, vol. 14, pp. 2167-2178, -// Dec. 2005. -// Adapted to RawTherapee by Jacques Desmis 3/2013 -// Improved speed and reduced memory consumption by Ingo Weyrich 2/2015 -//TODO Tiles to reduce memory consumption -void RawImageSource::lmmse_interpolate_omp(int winw, int winh, array2D &rawData, array2D &red, array2D &green, array2D &blue, int iterations) -{ - const int width = winw, height = winh; - const int ba = 10; - const int rr1 = height + 2 * ba; - const int cc1 = width + 2 * ba; - const int w1 = cc1; - const int w2 = 2 * w1; - const int w3 = 3 * w1; - const int w4 = 4 * w1; - float h0, h1, h2, h3, h4, hs; - h0 = 1.0f; - h1 = exp( -1.0f / 8.0f); - h2 = exp( -4.0f / 8.0f); - h3 = exp( -9.0f / 8.0f); - h4 = exp(-16.0f / 8.0f); - hs = h0 + 2.0f * (h1 + h2 + h3 + h4); - h0 /= hs; - h1 /= hs; - h2 /= hs; - h3 /= hs; - h4 /= hs; - int passref = 0; - int iter = 0; - - if(iterations <= 4) { - iter = iterations - 1; - passref = 0; - } else if (iterations <= 6) { - iter = 3; - passref = iterations - 4; - } else if (iterations <= 8) { - iter = 3; - passref = iterations - 6; - } - - bool applyGamma = true; - - if(iterations == 0) { - applyGamma = false; - iter = 0; - } else { - applyGamma = true; - } - - float *rix[5]; - float *qix[5]; - float *buffer = (float *)calloc(static_cast(rr1) * cc1 * 5 * sizeof(float), 1); - - if(buffer == nullptr) { // allocation of big block of memory failed, try to get 5 smaller ones - printf("lmmse_interpolate_omp: allocation of big memory block failed, try to get 5 smaller ones now...\n"); - bool allocationFailed = false; - - for(int i = 0; i < 5; i++) { - qix[i] = (float *)calloc(static_cast(rr1) * cc1 * sizeof(float), 1); - - if(!qix[i]) { // allocation of at least one small block failed - allocationFailed = true; - } - } - - if(allocationFailed) { // fall back to igv_interpolate - printf("lmmse_interpolate_omp: allocation of 5 small memory blocks failed, falling back to igv_interpolate...\n"); - - for(int i = 0; i < 5; i++) { // free the already allocated buffers - if(qix[i]) { - free(qix[i]); - } - } - - igv_interpolate(winw, winh); - return; - } - } else { - qix[0] = buffer; - - for(int i = 1; i < 5; i++) { - qix[i] = qix[i - 1] + rr1 * cc1; - } - } - - if (plistener) { - plistener->setProgressStr (Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), M("TP_RAW_LMMSE"))); - plistener->setProgress (0.0); - } - - - LUTf *gamtab; - - if(applyGamma) { - gamtab = &(Color::gammatab_24_17a); - } else { - gamtab = new LUTf(65536, LUT_CLIP_ABOVE | LUT_CLIP_BELOW); - gamtab->makeIdentity(65535.f); - } - - -#ifdef _OPENMP - #pragma omp parallel private(rix) -#endif - { -#ifdef _OPENMP - #pragma omp for -#endif - - for (int rrr = ba; rrr < rr1 - ba; rrr++) { - for (int ccc = ba, row = rrr - ba; ccc < cc1 - ba; ccc++) { - int col = ccc - ba; - float *rix = qix[4] + rrr * cc1 + ccc; - rix[0] = (*gamtab)[rawData[row][col]]; - } - } - -#ifdef _OPENMP - #pragma omp single -#endif - { - if (plistener) { - plistener->setProgress (0.1); - } - } - - // G-R(B) -#ifdef _OPENMP - #pragma omp for schedule(dynamic,16) -#endif - - for (int rr = 2; rr < rr1 - 2; rr++) { - // G-R(B) at R(B) location - for (int cc = 2 + (FC(rr, 2) & 1); cc < cc1 - 2; cc += 2) { - rix[4] = qix[4] + rr * cc1 + cc; - float v0 = x00625(rix[4][-w1 - 1] + rix[4][-w1 + 1] + rix[4][w1 - 1] + rix[4][w1 + 1]) + x0250(rix[4][0]); - // horizontal - rix[0] = qix[0] + rr * cc1 + cc; - rix[0][0] = - x0250(rix[4][ -2] + rix[4][ 2]) + xdiv2f(rix[4][ -1] + rix[4][0] + rix[4][ 1]); - float Y = v0 + xdiv2f(rix[0][0]); - - if (rix[4][0] > 1.75f * Y) { - rix[0][0] = median(rix[0][0], rix[4][ -1], rix[4][ 1]); - } else { - rix[0][0] = LIM(rix[0][0], 0.0f, 1.0f); - } - - rix[0][0] -= rix[4][0]; - // vertical - rix[1] = qix[1] + rr * cc1 + cc; - rix[1][0] = -x0250(rix[4][-w2] + rix[4][w2]) + xdiv2f(rix[4][-w1] + rix[4][0] + rix[4][w1]); - Y = v0 + xdiv2f(rix[1][0]); - - if (rix[4][0] > 1.75f * Y) { - rix[1][0] = median(rix[1][0], rix[4][-w1], rix[4][w1]); - } else { - rix[1][0] = LIM(rix[1][0], 0.0f, 1.0f); - } - - rix[1][0] -= rix[4][0]; - } - - // G-R(B) at G location - for (int ccc = 2 + (FC(rr, 3) & 1); ccc < cc1 - 2; ccc += 2) { - rix[0] = qix[0] + rr * cc1 + ccc; - rix[1] = qix[1] + rr * cc1 + ccc; - rix[4] = qix[4] + rr * cc1 + ccc; - rix[0][0] = x0250(rix[4][ -2] + rix[4][ 2]) - xdiv2f(rix[4][ -1] + rix[4][0] + rix[4][ 1]); - rix[1][0] = x0250(rix[4][-w2] + rix[4][w2]) - xdiv2f(rix[4][-w1] + rix[4][0] + rix[4][w1]); - rix[0][0] = LIM(rix[0][0], -1.0f, 0.0f) + rix[4][0]; - rix[1][0] = LIM(rix[1][0], -1.0f, 0.0f) + rix[4][0]; - } - } - -#ifdef _OPENMP - #pragma omp single -#endif - { - if (plistener) { - plistener->setProgress (0.2); - } - } - - - // apply low pass filter on differential colors -#ifdef _OPENMP - #pragma omp for -#endif - - for (int rr = 4; rr < rr1 - 4; rr++) - for (int cc = 4; cc < cc1 - 4; cc++) { - rix[0] = qix[0] + rr * cc1 + cc; - rix[2] = qix[2] + rr * cc1 + cc; - rix[2][0] = h0 * rix[0][0] + h1 * (rix[0][ -1] + rix[0][ 1]) + h2 * (rix[0][ -2] + rix[0][ 2]) + h3 * (rix[0][ -3] + rix[0][ 3]) + h4 * (rix[0][ -4] + rix[0][ 4]); - rix[1] = qix[1] + rr * cc1 + cc; - rix[3] = qix[3] + rr * cc1 + cc; - rix[3][0] = h0 * rix[1][0] + h1 * (rix[1][-w1] + rix[1][w1]) + h2 * (rix[1][-w2] + rix[1][w2]) + h3 * (rix[1][-w3] + rix[1][w3]) + h4 * (rix[1][-w4] + rix[1][w4]); - } - -#ifdef _OPENMP - #pragma omp single -#endif - { - if (plistener) { - plistener->setProgress (0.3); - } - } - - // interpolate G-R(B) at R(B) -#ifdef _OPENMP - #pragma omp for -#endif - - for (int rr = 4; rr < rr1 - 4; rr++) { - int cc = 4 + (FC(rr, 4) & 1); -#ifdef __SSE2__ - __m128 p1v, p2v, p3v, p4v, p5v, p6v, p7v, p8v, p9v, muv, vxv, vnv, xhv, vhv, xvv, vvv; - __m128 epsv = _mm_set1_ps(1e-7); - __m128 ninev = _mm_set1_ps(9.f); - - for (; cc < cc1 - 10; cc += 8) { - rix[0] = qix[0] + rr * cc1 + cc; - rix[1] = qix[1] + rr * cc1 + cc; - rix[2] = qix[2] + rr * cc1 + cc; - rix[3] = qix[3] + rr * cc1 + cc; - rix[4] = qix[4] + rr * cc1 + cc; - // horizontal - p1v = LC2VFU(rix[2][-4]); - p2v = LC2VFU(rix[2][-3]); - p3v = LC2VFU(rix[2][-2]); - p4v = LC2VFU(rix[2][-1]); - p5v = LC2VFU(rix[2][ 0]); - p6v = LC2VFU(rix[2][ 1]); - p7v = LC2VFU(rix[2][ 2]); - p8v = LC2VFU(rix[2][ 3]); - p9v = LC2VFU(rix[2][ 4]); - muv = (p1v + p2v + p3v + p4v + p5v + p6v + p7v + p8v + p9v) / ninev; - vxv = epsv + SQRV(p1v - muv) + SQRV(p2v - muv) + SQRV(p3v - muv) + SQRV(p4v - muv) + SQRV(p5v - muv) + SQRV(p6v - muv) + SQRV(p7v - muv) + SQRV(p8v - muv) + SQRV(p9v - muv); - p1v -= LC2VFU(rix[0][-4]); - p2v -= LC2VFU(rix[0][-3]); - p3v -= LC2VFU(rix[0][-2]); - p4v -= LC2VFU(rix[0][-1]); - p5v -= LC2VFU(rix[0][ 0]); - p6v -= LC2VFU(rix[0][ 1]); - p7v -= LC2VFU(rix[0][ 2]); - p8v -= LC2VFU(rix[0][ 3]); - p9v -= LC2VFU(rix[0][ 4]); - vnv = epsv + SQRV(p1v) + SQRV(p2v) + SQRV(p3v) + SQRV(p4v) + SQRV(p5v) + SQRV(p6v) + SQRV(p7v) + SQRV(p8v) + SQRV(p9v); - xhv = (LC2VFU(rix[0][0]) * vxv + LC2VFU(rix[2][0]) * vnv) / (vxv + vnv); - vhv = vxv * vnv / (vxv + vnv); - - // vertical - p1v = LC2VFU(rix[3][-w4]); - p2v = LC2VFU(rix[3][-w3]); - p3v = LC2VFU(rix[3][-w2]); - p4v = LC2VFU(rix[3][-w1]); - p5v = LC2VFU(rix[3][ 0]); - p6v = LC2VFU(rix[3][ w1]); - p7v = LC2VFU(rix[3][ w2]); - p8v = LC2VFU(rix[3][ w3]); - p9v = LC2VFU(rix[3][ w4]); - muv = (p1v + p2v + p3v + p4v + p5v + p6v + p7v + p8v + p9v) / ninev; - vxv = epsv + SQRV(p1v - muv) + SQRV(p2v - muv) + SQRV(p3v - muv) + SQRV(p4v - muv) + SQRV(p5v - muv) + SQRV(p6v - muv) + SQRV(p7v - muv) + SQRV(p8v - muv) + SQRV(p9v - muv); - p1v -= LC2VFU(rix[1][-w4]); - p2v -= LC2VFU(rix[1][-w3]); - p3v -= LC2VFU(rix[1][-w2]); - p4v -= LC2VFU(rix[1][-w1]); - p5v -= LC2VFU(rix[1][ 0]); - p6v -= LC2VFU(rix[1][ w1]); - p7v -= LC2VFU(rix[1][ w2]); - p8v -= LC2VFU(rix[1][ w3]); - p9v -= LC2VFU(rix[1][ w4]); - vnv = epsv + SQRV(p1v) + SQRV(p2v) + SQRV(p3v) + SQRV(p4v) + SQRV(p5v) + SQRV(p6v) + SQRV(p7v) + SQRV(p8v) + SQRV(p9v); - xvv = (LC2VFU(rix[1][0]) * vxv + LC2VFU(rix[3][0]) * vnv) / (vxv + vnv); - vvv = vxv * vnv / (vxv + vnv); - // interpolated G-R(B) - muv = (xhv * vvv + xvv * vhv) / (vhv + vvv); - STC2VFU(rix[4][0], muv); - } - -#endif - - for (; cc < cc1 - 4; cc += 2) { - rix[0] = qix[0] + rr * cc1 + cc; - rix[1] = qix[1] + rr * cc1 + cc; - rix[2] = qix[2] + rr * cc1 + cc; - rix[3] = qix[3] + rr * cc1 + cc; - rix[4] = qix[4] + rr * cc1 + cc; - // horizontal - float p1 = rix[2][-4]; - float p2 = rix[2][-3]; - float p3 = rix[2][-2]; - float p4 = rix[2][-1]; - float p5 = rix[2][ 0]; - float p6 = rix[2][ 1]; - float p7 = rix[2][ 2]; - float p8 = rix[2][ 3]; - float p9 = rix[2][ 4]; - float mu = (p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9) / 9.f; - float vx = 1e-7 + SQR(p1 - mu) + SQR(p2 - mu) + SQR(p3 - mu) + SQR(p4 - mu) + SQR(p5 - mu) + SQR(p6 - mu) + SQR(p7 - mu) + SQR(p8 - mu) + SQR(p9 - mu); - p1 -= rix[0][-4]; - p2 -= rix[0][-3]; - p3 -= rix[0][-2]; - p4 -= rix[0][-1]; - p5 -= rix[0][ 0]; - p6 -= rix[0][ 1]; - p7 -= rix[0][ 2]; - p8 -= rix[0][ 3]; - p9 -= rix[0][ 4]; - float vn = 1e-7 + SQR(p1) + SQR(p2) + SQR(p3) + SQR(p4) + SQR(p5) + SQR(p6) + SQR(p7) + SQR(p8) + SQR(p9); - float xh = (rix[0][0] * vx + rix[2][0] * vn) / (vx + vn); - float vh = vx * vn / (vx + vn); - - // vertical - p1 = rix[3][-w4]; - p2 = rix[3][-w3]; - p3 = rix[3][-w2]; - p4 = rix[3][-w1]; - p5 = rix[3][ 0]; - p6 = rix[3][ w1]; - p7 = rix[3][ w2]; - p8 = rix[3][ w3]; - p9 = rix[3][ w4]; - mu = (p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9) / 9.f; - vx = 1e-7 + SQR(p1 - mu) + SQR(p2 - mu) + SQR(p3 - mu) + SQR(p4 - mu) + SQR(p5 - mu) + SQR(p6 - mu) + SQR(p7 - mu) + SQR(p8 - mu) + SQR(p9 - mu); - p1 -= rix[1][-w4]; - p2 -= rix[1][-w3]; - p3 -= rix[1][-w2]; - p4 -= rix[1][-w1]; - p5 -= rix[1][ 0]; - p6 -= rix[1][ w1]; - p7 -= rix[1][ w2]; - p8 -= rix[1][ w3]; - p9 -= rix[1][ w4]; - vn = 1e-7 + SQR(p1) + SQR(p2) + SQR(p3) + SQR(p4) + SQR(p5) + SQR(p6) + SQR(p7) + SQR(p8) + SQR(p9); - float xv = (rix[1][0] * vx + rix[3][0] * vn) / (vx + vn); - float vv = vx * vn / (vx + vn); - // interpolated G-R(B) - rix[4][0] = (xh * vv + xv * vh) / (vh + vv); - } - } - -#ifdef _OPENMP - #pragma omp single -#endif - { - if (plistener) { - plistener->setProgress (0.4); - } - } - - // copy CFA values -#ifdef _OPENMP - #pragma omp for -#endif - - for (int rr = 0; rr < rr1; rr++) - for (int cc = 0, row = rr - ba; cc < cc1; cc++) { - int col = cc - ba; - int c = FC(rr, cc); - rix[c] = qix[c] + rr * cc1 + cc; - - if ((row >= 0) & (row < height) & (col >= 0) & (col < width)) { - rix[c][0] = (*gamtab)[rawData[row][col]]; - } else { - rix[c][0] = 0.f; - } - - if (c != 1) { - rix[1] = qix[1] + rr * cc1 + cc; - rix[4] = qix[4] + rr * cc1 + cc; - rix[1][0] = rix[c][0] + rix[4][0]; - } - } - -#ifdef _OPENMP - #pragma omp single -#endif - { - if (plistener) { - plistener->setProgress (0.5); - } - } - - // bilinear interpolation for R/B - // interpolate R/B at G location -#ifdef _OPENMP - #pragma omp for -#endif - - for (int rr = 1; rr < rr1 - 1; rr++) - for (int cc = 1 + (FC(rr, 2) & 1), c = FC(rr, cc + 1); cc < cc1 - 1; cc += 2) { - rix[c] = qix[c] + rr * cc1 + cc; - rix[1] = qix[1] + rr * cc1 + cc; - rix[c][0] = rix[1][0] + xdiv2f(rix[c][ -1] - rix[1][ -1] + rix[c][ 1] - rix[1][ 1]); - c = 2 - c; - rix[c] = qix[c] + rr * cc1 + cc; - rix[c][0] = rix[1][0] + xdiv2f(rix[c][-w1] - rix[1][-w1] + rix[c][w1] - rix[1][w1]); - c = 2 - c; - } - -#ifdef _OPENMP - #pragma omp single -#endif - { - if (plistener) { - plistener->setProgress (0.6); - } - } - - // interpolate R/B at B/R location -#ifdef _OPENMP - #pragma omp for -#endif - - for (int rr = 1; rr < rr1 - 1; rr++) - for (int cc = 1 + (FC(rr, 1) & 1), c = 2 - FC(rr, cc); cc < cc1 - 1; cc += 2) { - rix[c] = qix[c] + rr * cc1 + cc; - rix[1] = qix[1] + rr * cc1 + cc; - rix[c][0] = rix[1][0] + x0250(rix[c][-w1] - rix[1][-w1] + rix[c][ -1] - rix[1][ -1] + rix[c][ 1] - rix[1][ 1] + rix[c][ w1] - rix[1][ w1]); - } - -#ifdef _OPENMP - #pragma omp single -#endif - { - if (plistener) { - plistener->setProgress (0.7); - } - } - - }// End of parallelization 1 - - // median filter/ - for (int pass = 0; pass < iter; pass++) { - // Apply 3x3 median filter - // Compute median(R-G) and median(B-G) - -#ifdef _OPENMP - #pragma omp parallel for private(rix) -#endif - - for (int rr = 1; rr < rr1 - 1; rr++) { - for (int c = 0; c < 3; c += 2) { - int d = c + 3 - (c == 0 ? 0 : 1); - int cc = 1; -#ifdef __SSE2__ - - for (; cc < cc1 - 4; cc += 4) { - rix[d] = qix[d] + rr * cc1 + cc; - rix[c] = qix[c] + rr * cc1 + cc; - rix[1] = qix[1] + rr * cc1 + cc; - // Assign 3x3 differential color values - const std::array p = { - LVFU(rix[c][-w1 - 1]) - LVFU(rix[1][-w1 - 1]), - LVFU(rix[c][-w1]) - LVFU(rix[1][-w1]), - LVFU(rix[c][-w1 + 1]) - LVFU(rix[1][-w1 + 1]), - LVFU(rix[c][ -1]) - LVFU(rix[1][ -1]), - LVFU(rix[c][ 0]) - LVFU(rix[1][ 0]), - LVFU(rix[c][ 1]) - LVFU(rix[1][ 1]), - LVFU(rix[c][ w1 - 1]) - LVFU(rix[1][ w1 - 1]), - LVFU(rix[c][ w1]) - LVFU(rix[1][ w1]), - LVFU(rix[c][ w1 + 1]) - LVFU(rix[1][ w1 + 1]) - }; - _mm_storeu_ps(&rix[d][0], median(p)); - } - -#endif - - for (; cc < cc1 - 1; cc++) { - rix[d] = qix[d] + rr * cc1 + cc; - rix[c] = qix[c] + rr * cc1 + cc; - rix[1] = qix[1] + rr * cc1 + cc; - // Assign 3x3 differential color values - const std::array p = { - rix[c][-w1 - 1] - rix[1][-w1 - 1], - rix[c][-w1] - rix[1][-w1], - rix[c][-w1 + 1] - rix[1][-w1 + 1], - rix[c][ -1] - rix[1][ -1], - rix[c][ 0] - rix[1][ 0], - rix[c][ 1] - rix[1][ 1], - rix[c][ w1 - 1] - rix[1][ w1 - 1], - rix[c][ w1] - rix[1][ w1], - rix[c][ w1 + 1] - rix[1][ w1 + 1] - }; - rix[d][0] = median(p); - } - } - } - - // red/blue at GREEN pixel locations & red/blue and green at BLUE/RED pixel locations -#ifdef _OPENMP - #pragma omp parallel for private (rix) -#endif - - for (int rr = 0; rr < rr1; rr++) { - rix[0] = qix[0] + rr * cc1; - rix[1] = qix[1] + rr * cc1; - rix[2] = qix[2] + rr * cc1; - rix[3] = qix[3] + rr * cc1; - rix[4] = qix[4] + rr * cc1; - int c0 = FC(rr, 0); - int c1 = FC(rr, 1); - - if(c0 == 1) { - c1 = 2 - c1; - int d = c1 + 3 - (c1 == 0 ? 0 : 1); - int cc; - - for (cc = 0; cc < cc1 - 1; cc += 2) { - rix[0][0] = rix[1][0] + rix[3][0]; - rix[2][0] = rix[1][0] + rix[4][0]; - rix[0]++; - rix[1]++; - rix[2]++; - rix[3]++; - rix[4]++; - rix[c1][0] = rix[1][0] + rix[d][0]; - rix[1][0] = 0.5f * (rix[0][0] - rix[3][0] + rix[2][0] - rix[4][0]); - rix[0]++; - rix[1]++; - rix[2]++; - rix[3]++; - rix[4]++; - } - - if(cc < cc1) { // remaining pixel, only if width is odd - rix[0][0] = rix[1][0] + rix[3][0]; - rix[2][0] = rix[1][0] + rix[4][0]; - } - } else { - c0 = 2 - c0; - int d = c0 + 3 - (c0 == 0 ? 0 : 1); - int cc; - - for (cc = 0; cc < cc1 - 1; cc += 2) { - rix[c0][0] = rix[1][0] + rix[d][0]; - rix[1][0] = 0.5f * (rix[0][0] - rix[3][0] + rix[2][0] - rix[4][0]); - rix[0]++; - rix[1]++; - rix[2]++; - rix[3]++; - rix[4]++; - rix[0][0] = rix[1][0] + rix[3][0]; - rix[2][0] = rix[1][0] + rix[4][0]; - rix[0]++; - rix[1]++; - rix[2]++; - rix[3]++; - rix[4]++; - } - - if(cc < cc1) { // remaining pixel, only if width is odd - rix[c0][0] = rix[1][0] + rix[d][0]; - rix[1][0] = 0.5f * (rix[0][0] - rix[3][0] + rix[2][0] - rix[4][0]); - } - } - } - } - - if (plistener) { - plistener->setProgress (0.8); - } - - if(applyGamma) { - gamtab = &(Color::igammatab_24_17); - } else { - gamtab->makeIdentity(); - } - - array2D* rgb[3]; - rgb[0] = &red; - rgb[1] = &green; - rgb[2] = &blue; - - // copy result back to image matrix -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int row = 0; row < height; row++) { - for (int col = 0, rr = row + ba; col < width; col++) { - int cc = col + ba; - int c = FC(row, col); - - for (int ii = 0; ii < 3; ii++) - if (ii != c) { - float *rix = qix[ii] + rr * cc1 + cc; - (*(rgb[ii]))[row][col] = (*gamtab)[65535.f * rix[0]]; - } else { - (*(rgb[ii]))[row][col] = CLIP(rawData[row][col]); - } - } - } - - if (plistener) { - plistener->setProgress (1.0); - } - - if(buffer) { - free(buffer); - } else - for(int i = 0; i < 5; i++) { - free(qix[i]); - } - - if(!applyGamma) { - delete gamtab; - } - - if(iterations > 4 && iterations <= 6) { - refinement(passref); - } else if(iterations > 6) { - refinement_lassus(passref); - } - -} - /*** * * Bayer CFA Demosaicing using Integrated Gaussian Vector on Color Differences @@ -1553,357 +919,6 @@ void RawImageSource::nodemosaic(bool bw) } } -/* - Refinement based on EECI demosaicing algorithm by L. Chang and Y.P. Tan - Paul Lee - Adapted for RawTherapee - Jacques Desmis 04/2013 -*/ - -#ifdef __SSE2__ -#define CLIPV(a) vclampf(a,ZEROV,c65535v) -#endif -void RawImageSource::refinement(int PassCount) -{ - MyTime t1e, t2e; - t1e.set(); - - int width = W; - int height = H; - int w1 = width; - int w2 = 2 * w1; - - if (plistener) { - plistener->setProgressStr (M("TP_RAW_DMETHOD_PROGRESSBAR_REFINE")); - } - - array2D *rgb[3]; - rgb[0] = &red; - rgb[1] = &green; - rgb[2] = &blue; - - for (int b = 0; b < PassCount; b++) { - if (plistener) { - plistener->setProgress ((float)b / PassCount); - } - - -#ifdef _OPENMP - #pragma omp parallel -#endif - { - float *pix[3]; - - /* Reinforce interpolated green pixels on RED/BLUE pixel locations */ -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 2; row < height - 2; row++) { - int col = 2 + (FC(row, 2) & 1); - int c = FC(row, col); -#ifdef __SSE2__ - __m128 dLv, dRv, dUv, dDv, v0v; - __m128 onev = _mm_set1_ps(1.f); - __m128 zd5v = _mm_set1_ps(0.5f); - __m128 c65535v = _mm_set1_ps(65535.f); - - for (; col < width - 8; col += 8) { - int indx = row * width + col; - pix[c] = (float*)(*rgb[c]) + indx; - pix[1] = (float*)(*rgb[1]) + indx; - dLv = onev / (onev + vabsf(LC2VFU(pix[c][ -2]) - LC2VFU(pix[c][0])) + vabsf(LC2VFU(pix[1][ 1]) - LC2VFU(pix[1][ -1]))); - dRv = onev / (onev + vabsf(LC2VFU(pix[c][ 2]) - LC2VFU(pix[c][0])) + vabsf(LC2VFU(pix[1][ 1]) - LC2VFU(pix[1][ -1]))); - dUv = onev / (onev + vabsf(LC2VFU(pix[c][-w2]) - LC2VFU(pix[c][0])) + vabsf(LC2VFU(pix[1][w1]) - LC2VFU(pix[1][-w1]))); - dDv = onev / (onev + vabsf(LC2VFU(pix[c][ w2]) - LC2VFU(pix[c][0])) + vabsf(LC2VFU(pix[1][w1]) - LC2VFU(pix[1][-w1]))); - v0v = CLIPV(LC2VFU(pix[c][0]) + zd5v + ((LC2VFU(pix[1][-1]) - LC2VFU(pix[c][-1])) * dLv + (LC2VFU(pix[1][1]) - LC2VFU(pix[c][1])) * dRv + (LC2VFU(pix[1][-w1]) - LC2VFU(pix[c][-w1])) * dUv + (LC2VFU(pix[1][w1]) - LC2VFU(pix[c][w1])) * dDv ) / (dLv + dRv + dUv + dDv)); - STC2VFU(pix[1][0], v0v); - } - -#endif - - for (; col < width - 2; col += 2) { - int indx = row * width + col; - pix[c] = (float*)(*rgb[c]) + indx; - pix[1] = (float*)(*rgb[1]) + indx; - float dL = 1.f / (1.f + fabsf(pix[c][ -2] - pix[c][0]) + fabsf(pix[1][ 1] - pix[1][ -1])); - float dR = 1.f / (1.f + fabsf(pix[c][ 2] - pix[c][0]) + fabsf(pix[1][ 1] - pix[1][ -1])); - float dU = 1.f / (1.f + fabsf(pix[c][-w2] - pix[c][0]) + fabsf(pix[1][w1] - pix[1][-w1])); - float dD = 1.f / (1.f + fabsf(pix[c][ w2] - pix[c][0]) + fabsf(pix[1][w1] - pix[1][-w1])); - float v0 = (pix[c][0] + 0.5f + ((pix[1][ -1] - pix[c][ -1]) * dL + (pix[1][ 1] - pix[c][ 1]) * dR + (pix[1][-w1] - pix[c][-w1]) * dU + (pix[1][ w1] - pix[c][ w1]) * dD ) / (dL + dR + dU + dD)); - pix[1][0] = CLIP(v0); - } - } - - /* Reinforce interpolated red/blue pixels on GREEN pixel locations */ -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 2; row < height - 2; row++) { - int col = 2 + (FC(row, 3) & 1); - int c = FC(row, col + 1); -#ifdef __SSE2__ - __m128 dLv, dRv, dUv, dDv, v0v; - __m128 onev = _mm_set1_ps(1.f); - __m128 zd5v = _mm_set1_ps(0.5f); - __m128 c65535v = _mm_set1_ps(65535.f); - - for (; col < width - 8; col += 8) { - int indx = row * width + col; - pix[1] = (float*)(*rgb[1]) + indx; - - for (int i = 0; i < 2; c = 2 - c, i++) { - pix[c] = (float*)(*rgb[c]) + indx; - dLv = onev / (onev + vabsf(LC2VFU(pix[1][ -2]) - LC2VFU(pix[1][0])) + vabsf(LC2VFU(pix[c][ 1]) - LC2VFU(pix[c][ -1]))); - dRv = onev / (onev + vabsf(LC2VFU(pix[1][ 2]) - LC2VFU(pix[1][0])) + vabsf(LC2VFU(pix[c][ 1]) - LC2VFU(pix[c][ -1]))); - dUv = onev / (onev + vabsf(LC2VFU(pix[1][-w2]) - LC2VFU(pix[1][0])) + vabsf(LC2VFU(pix[c][w1]) - LC2VFU(pix[c][-w1]))); - dDv = onev / (onev + vabsf(LC2VFU(pix[1][ w2]) - LC2VFU(pix[1][0])) + vabsf(LC2VFU(pix[c][w1]) - LC2VFU(pix[c][-w1]))); - v0v = CLIPV(LC2VFU(pix[1][0]) + zd5v - ((LC2VFU(pix[1][-1]) - LC2VFU(pix[c][-1])) * dLv + (LC2VFU(pix[1][1]) - LC2VFU(pix[c][1])) * dRv + (LC2VFU(pix[1][-w1]) - LC2VFU(pix[c][-w1])) * dUv + (LC2VFU(pix[1][w1]) - LC2VFU(pix[c][w1])) * dDv ) / (dLv + dRv + dUv + dDv)); - STC2VFU(pix[c][0], v0v); - } - } - -#endif - - for (; col < width - 2; col += 2) { - int indx = row * width + col; - pix[1] = (float*)(*rgb[1]) + indx; - - for (int i = 0; i < 2; c = 2 - c, i++) { - pix[c] = (float*)(*rgb[c]) + indx; - float dL = 1.f / (1.f + fabsf(pix[1][ -2] - pix[1][0]) + fabsf(pix[c][ 1] - pix[c][ -1])); - float dR = 1.f / (1.f + fabsf(pix[1][ 2] - pix[1][0]) + fabsf(pix[c][ 1] - pix[c][ -1])); - float dU = 1.f / (1.f + fabsf(pix[1][-w2] - pix[1][0]) + fabsf(pix[c][w1] - pix[c][-w1])); - float dD = 1.f / (1.f + fabsf(pix[1][ w2] - pix[1][0]) + fabsf(pix[c][w1] - pix[c][-w1])); - float v0 = (pix[1][0] + 0.5f - ((pix[1][ -1] - pix[c][ -1]) * dL + (pix[1][ 1] - pix[c][ 1]) * dR + (pix[1][-w1] - pix[c][-w1]) * dU + (pix[1][ w1] - pix[c][ w1]) * dD ) / (dL + dR + dU + dD)); - pix[c][0] = CLIP(v0); - } - } - } - - /* Reinforce integrated red/blue pixels on BLUE/RED pixel locations */ -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 2; row < height - 2; row++) { - int col = 2 + (FC(row, 2) & 1); - int c = 2 - FC(row, col); -#ifdef __SSE2__ - __m128 dLv, dRv, dUv, dDv, v0v; - __m128 onev = _mm_set1_ps(1.f); - __m128 zd5v = _mm_set1_ps(0.5f); - __m128 c65535v = _mm_set1_ps(65535.f); - - for (; col < width - 8; col += 8) { - int indx = row * width + col; - pix[0] = (float*)(*rgb[0]) + indx; - pix[1] = (float*)(*rgb[1]) + indx; - pix[2] = (float*)(*rgb[2]) + indx; - int d = 2 - c; - dLv = onev / (onev + vabsf(LC2VFU(pix[d][ -2]) - LC2VFU(pix[d][0])) + vabsf(LC2VFU(pix[1][ 1]) - LC2VFU(pix[1][ -1]))); - dRv = onev / (onev + vabsf(LC2VFU(pix[d][ 2]) - LC2VFU(pix[d][0])) + vabsf(LC2VFU(pix[1][ 1]) - LC2VFU(pix[1][ -1]))); - dUv = onev / (onev + vabsf(LC2VFU(pix[d][-w2]) - LC2VFU(pix[d][0])) + vabsf(LC2VFU(pix[1][w1]) - LC2VFU(pix[1][-w1]))); - dDv = onev / (onev + vabsf(LC2VFU(pix[d][ w2]) - LC2VFU(pix[d][0])) + vabsf(LC2VFU(pix[1][w1]) - LC2VFU(pix[1][-w1]))); - v0v = CLIPV(LC2VFU(pix[1][0]) + zd5v - ((LC2VFU(pix[1][-1]) - LC2VFU(pix[c][-1])) * dLv + (LC2VFU(pix[1][1]) - LC2VFU(pix[c][1])) * dRv + (LC2VFU(pix[1][-w1]) - LC2VFU(pix[c][-w1])) * dUv + (LC2VFU(pix[1][w1]) - LC2VFU(pix[c][w1])) * dDv ) / (dLv + dRv + dUv + dDv)); - STC2VFU(pix[c][0], v0v); - } - -#endif - - for (; col < width - 2; col += 2) { - int indx = row * width + col; - pix[0] = (float*)(*rgb[0]) + indx; - pix[1] = (float*)(*rgb[1]) + indx; - pix[2] = (float*)(*rgb[2]) + indx; - int d = 2 - c; - float dL = 1.f / (1.f + fabsf(pix[d][ -2] - pix[d][0]) + fabsf(pix[1][ 1] - pix[1][ -1])); - float dR = 1.f / (1.f + fabsf(pix[d][ 2] - pix[d][0]) + fabsf(pix[1][ 1] - pix[1][ -1])); - float dU = 1.f / (1.f + fabsf(pix[d][-w2] - pix[d][0]) + fabsf(pix[1][w1] - pix[1][-w1])); - float dD = 1.f / (1.f + fabsf(pix[d][ w2] - pix[d][0]) + fabsf(pix[1][w1] - pix[1][-w1])); - float v0 = (pix[1][0] + 0.5f - ((pix[1][ -1] - pix[c][ -1]) * dL + (pix[1][ 1] - pix[c][ 1]) * dR + (pix[1][-w1] - pix[c][-w1]) * dU + (pix[1][ w1] - pix[c][ w1]) * dD ) / (dL + dR + dU + dD)); - pix[c][0] = CLIP(v0); - } - } - } // end parallel - } - - t2e.set(); - - if (settings->verbose) { - printf("Refinement Lee %d usec\n", t2e.etime(t1e)); - } -} -#ifdef __SSE2__ -#undef CLIPV -#endif - - -// Refinement based on EECI demozaicing algorithm by L. Chang and Y.P. Tan -// from "Lassus" : Luis Sanz Rodriguez, adapted by Jacques Desmis - JDC - and Oliver Duis for RawTherapee -// increases the signal to noise ratio (PSNR) # +1 to +2 dB : tested with Dcraw : -// eg: Lighthouse + AMaZE : without refinement:39.96 dB, with refinement:41.86 dB -// reduce color artifacts, improves the interpolation -// but it's relatively slow -// -// Should be DISABLED if it decreases image quality by increases some image noise and generates blocky edges -void RawImageSource::refinement_lassus(int PassCount) -{ - // const int PassCount=1; - - // if (settings->verbose) printf("Refinement\n"); - - MyTime t1e, t2e; - t1e.set(); - int u = W, v = 2 * u, w = 3 * u, x = 4 * u, y = 5 * u; - float (*image)[3]; - image = (float(*)[3]) calloc(static_cast(W) * H, sizeof * image); -#ifdef _OPENMP - #pragma omp parallel shared(image) -#endif - { - // convert red, blue, green to image -#ifdef _OPENMP - #pragma omp for -#endif - - for (int i = 0; i < H; i++) { - for (int j = 0; j < W; j++) { - image[i * W + j][0] = red [i][j]; - image[i * W + j][1] = green[i][j]; - image[i * W + j][2] = blue [i][j]; - } - } - - for (int b = 0; b < PassCount; b++) { - if (plistener) { - plistener->setProgressStr (M("TP_RAW_DMETHOD_PROGRESSBAR_REFINE")); - plistener->setProgress ((float)b / PassCount); - } - - // Reinforce interpolated green pixels on RED/BLUE pixel locations -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 6; row < H - 6; row++) { - for (int col = 6 + (FC(row, 2) & 1), c = FC(row, col); col < W - 6; col += 2) { - float (*pix)[3] = image + row * W + col; - - // Cubic Spline Interpolation by Li and Randhawa, modified by Luis Sanz Rodriguez - - float f[4]; - f[0] = 1.0f / (1.0f + xmul2f(fabs(x1125(pix[-v][c]) - x0875(pix[0][c]) - x0250(pix[-x][c]))) + fabs(x0875(pix[u][1]) - x1125(pix[-u][1]) + x0250(pix[-w][1])) + fabs(x0875(pix[-w][1]) - x1125(pix[-u][1]) + x0250(pix[-y][1]))); - f[1] = 1.0f / (1.0f + xmul2f(fabs(x1125(pix[+2][c]) - x0875(pix[0][c]) - x0250(pix[+4][c]))) + fabs(x0875(pix[1][1]) - x1125(pix[-1][1]) + x0250(pix[+3][1])) + fabs(x0875(pix[+3][1]) - x1125(pix[+1][1]) + x0250(pix[+5][1]))); - f[2] = 1.0f / (1.0f + xmul2f(fabs(x1125(pix[-2][c]) - x0875(pix[0][c]) - x0250(pix[-4][c]))) + fabs(x0875(pix[1][1]) - x1125(pix[-1][1]) + x0250(pix[-3][1])) + fabs(x0875(pix[-3][1]) - x1125(pix[-1][1]) + x0250(pix[-5][1]))); - f[3] = 1.0f / (1.0f + xmul2f(fabs(x1125(pix[+v][c]) - x0875(pix[0][c]) - x0250(pix[+x][c]))) + fabs(x0875(pix[u][1]) - x1125(pix[-u][1]) + x0250(pix[+w][1])) + fabs(x0875(pix[+w][1]) - x1125(pix[+u][1]) + x0250(pix[+y][1]))); - - float g[4];//CLIREF avoid overflow - g[0] = pix[0][c] + (x0875(CLIREF(pix[-u][1] - pix[-u][c])) + x0125(CLIREF(pix[+u][1] - pix[+u][c]))); - g[1] = pix[0][c] + (x0875(CLIREF(pix[+1][1] - pix[+1][c])) + x0125(CLIREF(pix[-1][1] - pix[-1][c]))); - g[2] = pix[0][c] + (x0875(CLIREF(pix[-1][1] - pix[-1][c])) + x0125(CLIREF(pix[+1][1] - pix[+1][c]))); - g[3] = pix[0][c] + (x0875(CLIREF(pix[+u][1] - pix[+u][c])) + x0125(CLIREF(pix[-u][1] - pix[-u][c]))); - - pix[0][1] = (f[0] * g[0] + f[1] * g[1] + f[2] * g[2] + f[3] * g[3]) / (f[0] + f[1] + f[2] + f[3]); - - } - } - - // Reinforce interpolated red/blue pixels on GREEN pixel locations -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 6; row < H - 6; row++) { - for (int col = 6 + (FC(row, 3) & 1), c = FC(row, col + 1); col < W - 6; col += 2) { - float (*pix)[3] = image + row * W + col; - - for (int i = 0; i < 2; c = 2 - c, i++) { - float f[4]; - f[0] = 1.0f / (1.0f + xmul2f(fabs(x0875(pix[-v][1]) - x1125(pix[0][1]) + x0250(pix[-x][1]))) + fabs(pix[u] [c] - pix[-u][c]) + fabs(pix[-w][c] - pix[-u][c])); - f[1] = 1.0f / (1.0f + xmul2f(fabs(x0875(pix[+2][1]) - x1125(pix[0][1]) + x0250(pix[+4][1]))) + fabs(pix[+1][c] - pix[-1][c]) + fabs(pix[+3][c] - pix[+1][c])); - f[2] = 1.0f / (1.0f + xmul2f(fabs(x0875(pix[-2][1]) - x1125(pix[0][1]) + x0250(pix[-4][1]))) + fabs(pix[+1][c] - pix[-1][c]) + fabs(pix[-3][c] - pix[-1][c])); - f[3] = 1.0f / (1.0f + xmul2f(fabs(x0875(pix[+v][1]) - x1125(pix[0][1]) + x0250(pix[+x][1]))) + fabs(pix[u] [c] - pix[-u][c]) + fabs(pix[+w][c] - pix[+u][c])); - - float g[5];//CLIREF avoid overflow - g[0] = CLIREF(pix[-u][1] - pix[-u][c]); - g[1] = CLIREF(pix[+1][1] - pix[+1][c]); - g[2] = CLIREF(pix[-1][1] - pix[-1][c]); - g[3] = CLIREF(pix[+u][1] - pix[+u][c]); - g[4] = ((f[0] * g[0] + f[1] * g[1] + f[2] * g[2] + f[3] * g[3]) / (f[0] + f[1] + f[2] + f[3])); - pix[0][c] = pix[0][1] - (0.65f * g[4] + 0.35f * CLIREF(pix[0][1] - pix[0][c])); - } - } - } - - // Reinforce integrated red/blue pixels on BLUE/RED pixel locations -#ifdef _OPENMP - #pragma omp for -#endif - - for (int row = 6; row < H - 6; row++) { - for (int col = 6 + (FC(row, 2) & 1), c = 2 - FC(row, col), d = 2 - c; col < W - 6; col += 2) { - float (*pix)[3] = image + row * W + col; - - float f[4]; - f[0] = 1.0f / (1.0f + xmul2f(fabs(x1125(pix[-v][d]) - x0875(pix[0][d]) - x0250(pix[-x][d]))) + fabs(x0875(pix[u][1]) - x1125(pix[-u][1]) + x0250(pix[-w][1])) + fabs(x0875(pix[-w][1]) - x1125(pix[-u][1]) + x0250(pix[-y][1]))); - f[1] = 1.0f / (1.0f + xmul2f(fabs(x1125(pix[+2][d]) - x0875(pix[0][d]) - x0250(pix[+4][d]))) + fabs(x0875(pix[1][1]) - x1125(pix[-1][1]) + x0250(pix[+3][1])) + fabs(x0875(pix[+3][1]) - x1125(pix[+1][1]) + x0250(pix[+5][1]))); - f[2] = 1.0f / (1.0f + xmul2f(fabs(x1125(pix[-2][d]) - x0875(pix[0][d]) - x0250(pix[-4][d]))) + fabs(x0875(pix[1][1]) - x1125(pix[-1][1]) + x0250(pix[-3][1])) + fabs(x0875(pix[-3][1]) - x1125(pix[-1][1]) + x0250(pix[-5][1]))); - f[3] = 1.0f / (1.0f + xmul2f(fabs(x1125(pix[+v][d]) - x0875(pix[0][d]) - x0250(pix[+x][d]))) + fabs(x0875(pix[u][1]) - x1125(pix[-u][1]) + x0250(pix[+w][1])) + fabs(x0875(pix[+w][1]) - x1125(pix[+u][1]) + x0250(pix[+y][1]))); - - float g[5]; - g[0] = (x0875((pix[-u][1] - pix[-u][c])) + x0125((pix[-v][1] - pix[-v][c]))); - g[1] = (x0875((pix[+1][1] - pix[+1][c])) + x0125((pix[+2][1] - pix[+2][c]))); - g[2] = (x0875((pix[-1][1] - pix[-1][c])) + x0125((pix[-2][1] - pix[-2][c]))); - g[3] = (x0875((pix[+u][1] - pix[+u][c])) + x0125((pix[+v][1] - pix[+v][c]))); - - g[4] = (f[0] * g[0] + f[1] * g[1] + f[2] * g[2] + f[3] * g[3]) / (f[0] + f[1] + f[2] + f[3]); - - const std::array p = { - pix[-u - 1][1] - pix[-u - 1][c], - pix[-u + 0][1] - pix[-u + 0][c], - pix[-u + 1][1] - pix[-u + 1][c], - pix[+0 - 1][1] - pix[+0 - 1][c], - pix[+0 + 0][1] - pix[+0 + 0][c], - pix[+0 + 1][1] - pix[+0 + 1][c], - pix[+u - 1][1] - pix[+u - 1][c], - pix[+u + 0][1] - pix[+u + 0][c], - pix[+u + 1][1] - pix[+u + 1][c] - }; - - const float med = median(p); - - pix[0][c] = LIM(pix[0][1] - (1.30f * g[4] - 0.30f * (pix[0][1] - pix[0][c])), 0.99f * (pix[0][1] - med), 1.01f * (pix[0][1] - med)); - - } - } - - } - - // put modified values to red, green, blue -#ifdef _OPENMP - #pragma omp for -#endif - - for (int i = 0; i < H; i++) { - for (int j = 0; j < W; j++) { - red [i][j] = image[i * W + j][0]; - green[i][j] = image[i * W + j][1]; - blue [i][j] = image[i * W + j][2]; - } - } - } - - free(image); - - t2e.set(); - - if (settings->verbose) { - printf("Refinement Lassus %d usec\n", t2e.etime(t1e)); - } -} - - /* * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are diff --git a/rtengine/lmmse_demosaic.cc b/rtengine/lmmse_demosaic.cc new file mode 100644 index 000000000..6191ca36c --- /dev/null +++ b/rtengine/lmmse_demosaic.cc @@ -0,0 +1,824 @@ +/* + * This file is part of RawTherapee. + * + * Copyright (c) 2004-2019 Gabor Horvath + * + * RawTherapee is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * RawTherapee is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with RawTherapee. If not, see . + */ +#include + +#include "rawimagesource.h" +#include "rt_math.h" +#include "color.h" +#include "../rtgui/multilangmgr.h" +#include "sleef.h" +#include "opthelper.h" +#include "median.h" + +using namespace std; + +namespace rtengine +{ + +// LSMME demosaicing algorithm +// L. Zhang and X. Wu, +// Color demozaicing via directional Linear Minimum Mean Square-error Estimation, +// IEEE Trans. on Image Processing, vol. 14, pp. 2167-2178, +// Dec. 2005. +// Adapted to RawTherapee by Jacques Desmis 3/2013 +// Improved speed and reduced memory consumption by Ingo Weyrich 2/2015 +// TODO Tiles to reduce memory consumption +void RawImageSource::lmmse_interpolate_omp(int winw, int winh, const array2D &rawData, array2D &red, array2D &green, array2D &blue, int iterations) +{ + const int width = winw, height = winh; + const int ba = 10; + const int rr1 = height + 2 * ba; + const int cc1 = width + 2 * ba; + const int w1 = cc1; + const int w2 = 2 * w1; + const int w3 = 3 * w1; + const int w4 = 4 * w1; + float h0, h1, h2, h3, h4, hs; + h0 = 1.0f; + h1 = exp( -1.0f / 8.0f); + h2 = exp( -4.0f / 8.0f); + h3 = exp( -9.0f / 8.0f); + h4 = exp(-16.0f / 8.0f); + hs = h0 + 2.0f * (h1 + h2 + h3 + h4); + h0 /= hs; + h1 /= hs; + h2 /= hs; + h3 /= hs; + h4 /= hs; + int passref = 0; + int iter = 0; + + if (iterations <= 4) { + iter = iterations - 1; + passref = 0; + } else if (iterations <= 6) { + iter = 3; + passref = iterations - 4; + } else if (iterations <= 8) { + iter = 3; + passref = iterations - 6; + } + + bool applyGamma = true; + + if (iterations == 0) { + applyGamma = false; + iter = 0; + } else { + applyGamma = true; + } + + float *rix[5]; + float *qix[5]; + float *buffer = (float *)calloc(static_cast(rr1) * cc1 * 5 * sizeof(float), 1); + + if (buffer == nullptr) { // allocation of big block of memory failed, try to get 5 smaller ones + printf("lmmse_interpolate_omp: allocation of big memory block failed, try to get 5 smaller ones now...\n"); + bool allocationFailed = false; + + for (int i = 0; i < 5; i++) { + qix[i] = (float *)calloc(static_cast(rr1) * cc1 * sizeof(float), 1); + + if (!qix[i]) { // allocation of at least one small block failed + allocationFailed = true; + } + } + + if (allocationFailed) { // fall back to igv_interpolate + printf("lmmse_interpolate_omp: allocation of 5 small memory blocks failed, falling back to igv_interpolate...\n"); + + for (int i = 0; i < 5; i++) { // free the already allocated buffers + if (qix[i]) { + free(qix[i]); + } + } + + igv_interpolate(winw, winh); + return; + } + } else { + qix[0] = buffer; + + for (int i = 1; i < 5; i++) { + qix[i] = qix[i - 1] + rr1 * cc1; + } + } + + if (plistener) { + plistener->setProgressStr (Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), M("TP_RAW_LMMSE"))); + plistener->setProgress (0.0); + } + + + LUTf *gamtab; + + if (applyGamma) { + gamtab = &(Color::gammatab_24_17a); + } else { + gamtab = new LUTf(65536, LUT_CLIP_ABOVE | LUT_CLIP_BELOW); + gamtab->makeIdentity(65535.f); + } + + +#ifdef _OPENMP + #pragma omp parallel private(rix) +#endif + { +#ifdef _OPENMP + #pragma omp for +#endif + + for (int rrr = ba; rrr < rr1 - ba; rrr++) { + for (int ccc = ba, row = rrr - ba; ccc < cc1 - ba; ccc++) { + int col = ccc - ba; + float *rix = qix[4] + rrr * cc1 + ccc; + rix[0] = (*gamtab)[rawData[row][col]]; + } + } + +#ifdef _OPENMP + #pragma omp single +#endif + { + if (plistener) { + plistener->setProgress (0.1); + } + } + + // G-R(B) +#ifdef _OPENMP + #pragma omp for schedule(dynamic,16) +#endif + + for (int rr = 2; rr < rr1 - 2; rr++) { + // G-R(B) at R(B) location + for (int cc = 2 + (FC(rr, 2) & 1); cc < cc1 - 2; cc += 2) { + rix[4] = qix[4] + rr * cc1 + cc; + float v0 = 0.0625f * (rix[4][-w1 - 1] + rix[4][-w1 + 1] + rix[4][w1 - 1] + rix[4][w1 + 1]) + 0.25f * (rix[4][0]); + // horizontal + rix[0] = qix[0] + rr * cc1 + cc; + rix[0][0] = -0.25f * (rix[4][ -2] + rix[4][ 2]) + xdiv2f(rix[4][ -1] + rix[4][0] + rix[4][ 1]); + float Y = v0 + xdiv2f(rix[0][0]); + + if (rix[4][0] > 1.75f * Y) { + rix[0][0] = median(rix[0][0], rix[4][ -1], rix[4][ 1]); + } else { + rix[0][0] = LIM(rix[0][0], 0.0f, 1.0f); + } + + rix[0][0] -= rix[4][0]; + // vertical + rix[1] = qix[1] + rr * cc1 + cc; + rix[1][0] = -0.25f * (rix[4][-w2] + rix[4][w2]) + xdiv2f(rix[4][-w1] + rix[4][0] + rix[4][w1]); + Y = v0 + xdiv2f(rix[1][0]); + + if (rix[4][0] > 1.75f * Y) { + rix[1][0] = median(rix[1][0], rix[4][-w1], rix[4][w1]); + } else { + rix[1][0] = LIM(rix[1][0], 0.0f, 1.0f); + } + + rix[1][0] -= rix[4][0]; + } + + // G-R(B) at G location + for (int ccc = 2 + (FC(rr, 3) & 1); ccc < cc1 - 2; ccc += 2) { + rix[0] = qix[0] + rr * cc1 + ccc; + rix[1] = qix[1] + rr * cc1 + ccc; + rix[4] = qix[4] + rr * cc1 + ccc; + rix[0][0] = 0.25f * (rix[4][ -2] + rix[4][ 2]) - xdiv2f(rix[4][ -1] + rix[4][0] + rix[4][ 1]); + rix[1][0] = 0.25f * (rix[4][-w2] + rix[4][w2]) - xdiv2f(rix[4][-w1] + rix[4][0] + rix[4][w1]); + rix[0][0] = LIM(rix[0][0], -1.0f, 0.0f) + rix[4][0]; + rix[1][0] = LIM(rix[1][0], -1.0f, 0.0f) + rix[4][0]; + } + } + +#ifdef _OPENMP + #pragma omp single +#endif + { + if (plistener) { + plistener->setProgress (0.2); + } + } + + + // apply low pass filter on differential colors +#ifdef _OPENMP + #pragma omp for +#endif + + for (int rr = 4; rr < rr1 - 4; rr++) + for (int cc = 4; cc < cc1 - 4; cc++) { + rix[0] = qix[0] + rr * cc1 + cc; + rix[2] = qix[2] + rr * cc1 + cc; + rix[2][0] = h0 * rix[0][0] + h1 * (rix[0][ -1] + rix[0][ 1]) + h2 * (rix[0][ -2] + rix[0][ 2]) + h3 * (rix[0][ -3] + rix[0][ 3]) + h4 * (rix[0][ -4] + rix[0][ 4]); + rix[1] = qix[1] + rr * cc1 + cc; + rix[3] = qix[3] + rr * cc1 + cc; + rix[3][0] = h0 * rix[1][0] + h1 * (rix[1][-w1] + rix[1][w1]) + h2 * (rix[1][-w2] + rix[1][w2]) + h3 * (rix[1][-w3] + rix[1][w3]) + h4 * (rix[1][-w4] + rix[1][w4]); + } + +#ifdef _OPENMP + #pragma omp single +#endif + { + if (plistener) { + plistener->setProgress (0.3); + } + } + + // interpolate G-R(B) at R(B) +#ifdef _OPENMP + #pragma omp for +#endif + + for (int rr = 4; rr < rr1 - 4; rr++) { + int cc = 4 + (FC(rr, 4) & 1); +#ifdef __SSE2__ + vfloat p1v, p2v, p3v, p4v, p5v, p6v, p7v, p8v, p9v, muv, vxv, vnv, xhv, vhv, xvv, vvv; + vfloat epsv = F2V(1e-7); + vfloat ninev = F2V(9.f); + + for (; cc < cc1 - 10; cc += 8) { + rix[0] = qix[0] + rr * cc1 + cc; + rix[1] = qix[1] + rr * cc1 + cc; + rix[2] = qix[2] + rr * cc1 + cc; + rix[3] = qix[3] + rr * cc1 + cc; + rix[4] = qix[4] + rr * cc1 + cc; + // horizontal + p1v = LC2VFU(rix[2][-4]); + p2v = LC2VFU(rix[2][-3]); + p3v = LC2VFU(rix[2][-2]); + p4v = LC2VFU(rix[2][-1]); + p5v = LC2VFU(rix[2][ 0]); + p6v = LC2VFU(rix[2][ 1]); + p7v = LC2VFU(rix[2][ 2]); + p8v = LC2VFU(rix[2][ 3]); + p9v = LC2VFU(rix[2][ 4]); + muv = (p1v + p2v + p3v + p4v + p5v + p6v + p7v + p8v + p9v) / ninev; + vxv = epsv + SQRV(p1v - muv) + SQRV(p2v - muv) + SQRV(p3v - muv) + SQRV(p4v - muv) + SQRV(p5v - muv) + SQRV(p6v - muv) + SQRV(p7v - muv) + SQRV(p8v - muv) + SQRV(p9v - muv); + p1v -= LC2VFU(rix[0][-4]); + p2v -= LC2VFU(rix[0][-3]); + p3v -= LC2VFU(rix[0][-2]); + p4v -= LC2VFU(rix[0][-1]); + p5v -= LC2VFU(rix[0][ 0]); + p6v -= LC2VFU(rix[0][ 1]); + p7v -= LC2VFU(rix[0][ 2]); + p8v -= LC2VFU(rix[0][ 3]); + p9v -= LC2VFU(rix[0][ 4]); + vnv = epsv + SQRV(p1v) + SQRV(p2v) + SQRV(p3v) + SQRV(p4v) + SQRV(p5v) + SQRV(p6v) + SQRV(p7v) + SQRV(p8v) + SQRV(p9v); + xhv = (LC2VFU(rix[0][0]) * vxv + LC2VFU(rix[2][0]) * vnv) / (vxv + vnv); + vhv = vxv * vnv / (vxv + vnv); + + // vertical + p1v = LC2VFU(rix[3][-w4]); + p2v = LC2VFU(rix[3][-w3]); + p3v = LC2VFU(rix[3][-w2]); + p4v = LC2VFU(rix[3][-w1]); + p5v = LC2VFU(rix[3][ 0]); + p6v = LC2VFU(rix[3][ w1]); + p7v = LC2VFU(rix[3][ w2]); + p8v = LC2VFU(rix[3][ w3]); + p9v = LC2VFU(rix[3][ w4]); + muv = (p1v + p2v + p3v + p4v + p5v + p6v + p7v + p8v + p9v) / ninev; + vxv = epsv + SQRV(p1v - muv) + SQRV(p2v - muv) + SQRV(p3v - muv) + SQRV(p4v - muv) + SQRV(p5v - muv) + SQRV(p6v - muv) + SQRV(p7v - muv) + SQRV(p8v - muv) + SQRV(p9v - muv); + p1v -= LC2VFU(rix[1][-w4]); + p2v -= LC2VFU(rix[1][-w3]); + p3v -= LC2VFU(rix[1][-w2]); + p4v -= LC2VFU(rix[1][-w1]); + p5v -= LC2VFU(rix[1][ 0]); + p6v -= LC2VFU(rix[1][ w1]); + p7v -= LC2VFU(rix[1][ w2]); + p8v -= LC2VFU(rix[1][ w3]); + p9v -= LC2VFU(rix[1][ w4]); + vnv = epsv + SQRV(p1v) + SQRV(p2v) + SQRV(p3v) + SQRV(p4v) + SQRV(p5v) + SQRV(p6v) + SQRV(p7v) + SQRV(p8v) + SQRV(p9v); + xvv = (LC2VFU(rix[1][0]) * vxv + LC2VFU(rix[3][0]) * vnv) / (vxv + vnv); + vvv = vxv * vnv / (vxv + vnv); + // interpolated G-R(B) + muv = (xhv * vvv + xvv * vhv) / (vhv + vvv); + STC2VFU(rix[4][0], muv); + } + +#endif + + for (; cc < cc1 - 4; cc += 2) { + rix[0] = qix[0] + rr * cc1 + cc; + rix[1] = qix[1] + rr * cc1 + cc; + rix[2] = qix[2] + rr * cc1 + cc; + rix[3] = qix[3] + rr * cc1 + cc; + rix[4] = qix[4] + rr * cc1 + cc; + // horizontal + float p1 = rix[2][-4]; + float p2 = rix[2][-3]; + float p3 = rix[2][-2]; + float p4 = rix[2][-1]; + float p5 = rix[2][ 0]; + float p6 = rix[2][ 1]; + float p7 = rix[2][ 2]; + float p8 = rix[2][ 3]; + float p9 = rix[2][ 4]; + float mu = (p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9) / 9.f; + float vx = 1e-7 + SQR(p1 - mu) + SQR(p2 - mu) + SQR(p3 - mu) + SQR(p4 - mu) + SQR(p5 - mu) + SQR(p6 - mu) + SQR(p7 - mu) + SQR(p8 - mu) + SQR(p9 - mu); + p1 -= rix[0][-4]; + p2 -= rix[0][-3]; + p3 -= rix[0][-2]; + p4 -= rix[0][-1]; + p5 -= rix[0][ 0]; + p6 -= rix[0][ 1]; + p7 -= rix[0][ 2]; + p8 -= rix[0][ 3]; + p9 -= rix[0][ 4]; + float vn = 1e-7 + SQR(p1) + SQR(p2) + SQR(p3) + SQR(p4) + SQR(p5) + SQR(p6) + SQR(p7) + SQR(p8) + SQR(p9); + float xh = (rix[0][0] * vx + rix[2][0] * vn) / (vx + vn); + float vh = vx * vn / (vx + vn); + + // vertical + p1 = rix[3][-w4]; + p2 = rix[3][-w3]; + p3 = rix[3][-w2]; + p4 = rix[3][-w1]; + p5 = rix[3][ 0]; + p6 = rix[3][ w1]; + p7 = rix[3][ w2]; + p8 = rix[3][ w3]; + p9 = rix[3][ w4]; + mu = (p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + p9) / 9.f; + vx = 1e-7 + SQR(p1 - mu) + SQR(p2 - mu) + SQR(p3 - mu) + SQR(p4 - mu) + SQR(p5 - mu) + SQR(p6 - mu) + SQR(p7 - mu) + SQR(p8 - mu) + SQR(p9 - mu); + p1 -= rix[1][-w4]; + p2 -= rix[1][-w3]; + p3 -= rix[1][-w2]; + p4 -= rix[1][-w1]; + p5 -= rix[1][ 0]; + p6 -= rix[1][ w1]; + p7 -= rix[1][ w2]; + p8 -= rix[1][ w3]; + p9 -= rix[1][ w4]; + vn = 1e-7 + SQR(p1) + SQR(p2) + SQR(p3) + SQR(p4) + SQR(p5) + SQR(p6) + SQR(p7) + SQR(p8) + SQR(p9); + float xv = (rix[1][0] * vx + rix[3][0] * vn) / (vx + vn); + float vv = vx * vn / (vx + vn); + // interpolated G-R(B) + rix[4][0] = (xh * vv + xv * vh) / (vh + vv); + } + } + +#ifdef _OPENMP + #pragma omp single +#endif + { + if (plistener) { + plistener->setProgress (0.4); + } + } + + // copy CFA values +#ifdef _OPENMP + #pragma omp for +#endif + + for (int rr = 0; rr < rr1; rr++) + for (int cc = 0, row = rr - ba; cc < cc1; cc++) { + int col = cc - ba; + int c = FC(rr, cc); + rix[c] = qix[c] + rr * cc1 + cc; + + if ((row >= 0) & (row < height) & (col >= 0) & (col < width)) { + rix[c][0] = (*gamtab)[rawData[row][col]]; + } else { + rix[c][0] = 0.f; + } + + if (c != 1) { + rix[1] = qix[1] + rr * cc1 + cc; + rix[4] = qix[4] + rr * cc1 + cc; + rix[1][0] = rix[c][0] + rix[4][0]; + } + } + +#ifdef _OPENMP + #pragma omp single +#endif + { + if (plistener) { + plistener->setProgress (0.5); + } + } + + // bilinear interpolation for R/B + // interpolate R/B at G location +#ifdef _OPENMP + #pragma omp for +#endif + + for (int rr = 1; rr < rr1 - 1; rr++) + for (int cc = 1 + (FC(rr, 2) & 1), c = FC(rr, cc + 1); cc < cc1 - 1; cc += 2) { + rix[c] = qix[c] + rr * cc1 + cc; + rix[1] = qix[1] + rr * cc1 + cc; + rix[c][0] = rix[1][0] + xdiv2f(rix[c][ -1] - rix[1][ -1] + rix[c][ 1] - rix[1][ 1]); + c = 2 - c; + rix[c] = qix[c] + rr * cc1 + cc; + rix[c][0] = rix[1][0] + xdiv2f(rix[c][-w1] - rix[1][-w1] + rix[c][w1] - rix[1][w1]); + c = 2 - c; + } + +#ifdef _OPENMP + #pragma omp single +#endif + { + if (plistener) { + plistener->setProgress (0.6); + } + } + + // interpolate R/B at B/R location +#ifdef _OPENMP + #pragma omp for +#endif + + for (int rr = 1; rr < rr1 - 1; rr++) + for (int cc = 1 + (FC(rr, 1) & 1), c = 2 - FC(rr, cc); cc < cc1 - 1; cc += 2) { + rix[c] = qix[c] + rr * cc1 + cc; + rix[1] = qix[1] + rr * cc1 + cc; + rix[c][0] = rix[1][0] + 0.25f * (rix[c][-w1] - rix[1][-w1] + rix[c][ -1] - rix[1][ -1] + rix[c][ 1] - rix[1][ 1] + rix[c][ w1] - rix[1][ w1]); + } + +#ifdef _OPENMP + #pragma omp single +#endif + { + if (plistener) { + plistener->setProgress (0.7); + } + } + + }// End of parallelization 1 + + // median filter/ + for (int pass = 0; pass < iter; pass++) { + // Apply 3x3 median filter + // Compute median(R-G) and median(B-G) + +#ifdef _OPENMP + #pragma omp parallel for private(rix) +#endif + + for (int rr = 1; rr < rr1 - 1; rr++) { + for (int c = 0; c < 3; c += 2) { + int d = c + 3 - (c == 0 ? 0 : 1); + int cc = 1; +#ifdef __SSE2__ + + for (; cc < cc1 - 4; cc += 4) { + rix[d] = qix[d] + rr * cc1 + cc; + rix[c] = qix[c] + rr * cc1 + cc; + rix[1] = qix[1] + rr * cc1 + cc; + // Assign 3x3 differential color values + const std::array p = { + LVFU(rix[c][-w1 - 1]) - LVFU(rix[1][-w1 - 1]), + LVFU(rix[c][-w1]) - LVFU(rix[1][-w1]), + LVFU(rix[c][-w1 + 1]) - LVFU(rix[1][-w1 + 1]), + LVFU(rix[c][ -1]) - LVFU(rix[1][ -1]), + LVFU(rix[c][ 0]) - LVFU(rix[1][ 0]), + LVFU(rix[c][ 1]) - LVFU(rix[1][ 1]), + LVFU(rix[c][ w1 - 1]) - LVFU(rix[1][ w1 - 1]), + LVFU(rix[c][ w1]) - LVFU(rix[1][ w1]), + LVFU(rix[c][ w1 + 1]) - LVFU(rix[1][ w1 + 1]) + }; + _mm_storeu_ps(&rix[d][0], median(p)); + } + +#endif + + for (; cc < cc1 - 1; cc++) { + rix[d] = qix[d] + rr * cc1 + cc; + rix[c] = qix[c] + rr * cc1 + cc; + rix[1] = qix[1] + rr * cc1 + cc; + // Assign 3x3 differential color values + const std::array p = { + rix[c][-w1 - 1] - rix[1][-w1 - 1], + rix[c][-w1] - rix[1][-w1], + rix[c][-w1 + 1] - rix[1][-w1 + 1], + rix[c][ -1] - rix[1][ -1], + rix[c][ 0] - rix[1][ 0], + rix[c][ 1] - rix[1][ 1], + rix[c][ w1 - 1] - rix[1][ w1 - 1], + rix[c][ w1] - rix[1][ w1], + rix[c][ w1 + 1] - rix[1][ w1 + 1] + }; + rix[d][0] = median(p); + } + } + } + + // red/blue at GREEN pixel locations & red/blue and green at BLUE/RED pixel locations +#ifdef _OPENMP + #pragma omp parallel for private (rix) +#endif + + for (int rr = 0; rr < rr1; rr++) { + rix[0] = qix[0] + rr * cc1; + rix[1] = qix[1] + rr * cc1; + rix[2] = qix[2] + rr * cc1; + rix[3] = qix[3] + rr * cc1; + rix[4] = qix[4] + rr * cc1; + int c0 = FC(rr, 0); + int c1 = FC(rr, 1); + + if (c0 == 1) { + c1 = 2 - c1; + int d = c1 + 3 - (c1 == 0 ? 0 : 1); + int cc; + + for (cc = 0; cc < cc1 - 1; cc += 2) { + rix[0][0] = rix[1][0] + rix[3][0]; + rix[2][0] = rix[1][0] + rix[4][0]; + rix[0]++; + rix[1]++; + rix[2]++; + rix[3]++; + rix[4]++; + rix[c1][0] = rix[1][0] + rix[d][0]; + rix[1][0] = 0.5f * (rix[0][0] - rix[3][0] + rix[2][0] - rix[4][0]); + rix[0]++; + rix[1]++; + rix[2]++; + rix[3]++; + rix[4]++; + } + + if (cc < cc1) { // remaining pixel, only if width is odd + rix[0][0] = rix[1][0] + rix[3][0]; + rix[2][0] = rix[1][0] + rix[4][0]; + } + } else { + c0 = 2 - c0; + int d = c0 + 3 - (c0 == 0 ? 0 : 1); + int cc; + + for (cc = 0; cc < cc1 - 1; cc += 2) { + rix[c0][0] = rix[1][0] + rix[d][0]; + rix[1][0] = 0.5f * (rix[0][0] - rix[3][0] + rix[2][0] - rix[4][0]); + rix[0]++; + rix[1]++; + rix[2]++; + rix[3]++; + rix[4]++; + rix[0][0] = rix[1][0] + rix[3][0]; + rix[2][0] = rix[1][0] + rix[4][0]; + rix[0]++; + rix[1]++; + rix[2]++; + rix[3]++; + rix[4]++; + } + + if (cc < cc1) { // remaining pixel, only if width is odd + rix[c0][0] = rix[1][0] + rix[d][0]; + rix[1][0] = 0.5f * (rix[0][0] - rix[3][0] + rix[2][0] - rix[4][0]); + } + } + } + } + + if (plistener) { + plistener->setProgress (0.8); + } + + if (applyGamma) { + gamtab = &(Color::igammatab_24_17); + } else { + gamtab->makeIdentity(); + } + + array2D* rgb[3]; + rgb[0] = &red; + rgb[1] = &green; + rgb[2] = &blue; + + // copy result back to image matrix +#ifdef _OPENMP + #pragma omp parallel for +#endif + + for (int row = 0; row < height; row++) { + for (int col = 0, rr = row + ba; col < width; col++) { + int cc = col + ba; + int c = FC(row, col); + + for (int ii = 0; ii < 3; ii++) + if (ii != c) { + float *rix = qix[ii] + rr * cc1 + cc; + (*(rgb[ii]))[row][col] = (*gamtab)[65535.f * rix[0]]; + } else { + (*(rgb[ii]))[row][col] = CLIP(rawData[row][col]); + } + } + } + + if (plistener) { + plistener->setProgress (1.0); + } + + if (buffer) { + free(buffer); + } else + for (int i = 0; i < 5; i++) { + free(qix[i]); + } + + if (!applyGamma) { + delete gamtab; + } + + if (iterations > 4) { + refinement(passref); + } + +} + +#ifdef __SSE2__ +#define CLIPV(a) vclampf(a,ZEROV,c65535v) +#endif +void RawImageSource::refinement(int PassCount) +{ + int width = W; + int height = H; + int w1 = width; + int w2 = 2 * w1; + + if (plistener) { + plistener->setProgressStr(M("TP_RAW_DMETHOD_PROGRESSBAR_REFINE")); + } + + array2D *rgb[3]; + rgb[0] = &red; + rgb[1] = &green; + rgb[2] = &blue; + + for (int b = 0; b < PassCount; b++) { + if (plistener) { + plistener->setProgress((float)b / PassCount); + } + + +#ifdef _OPENMP + #pragma omp parallel +#endif + { + float *pix[3]; + + /* Reinforce interpolated green pixels on RED/BLUE pixel locations */ +#ifdef _OPENMP + #pragma omp for +#endif + + for (int row = 2; row < height - 2; row++) { + int col = 2 + (FC(row, 2) & 1); + int c = FC(row, col); +#ifdef __SSE2__ + vfloat dLv, dRv, dUv, dDv, v0v; + vfloat onev = F2V(1.f); + vfloat zd5v = F2V(0.5f); + vfloat c65535v = F2V(65535.f); + + for (; col < width - 8; col += 8) { + int indx = row * width + col; + pix[c] = (float*)(*rgb[c]) + indx; + pix[1] = (float*)(*rgb[1]) + indx; + dLv = onev / (onev + vabsf(LC2VFU(pix[c][ -2]) - LC2VFU(pix[c][0])) + vabsf(LC2VFU(pix[1][ 1]) - LC2VFU(pix[1][ -1]))); + dRv = onev / (onev + vabsf(LC2VFU(pix[c][ 2]) - LC2VFU(pix[c][0])) + vabsf(LC2VFU(pix[1][ 1]) - LC2VFU(pix[1][ -1]))); + dUv = onev / (onev + vabsf(LC2VFU(pix[c][-w2]) - LC2VFU(pix[c][0])) + vabsf(LC2VFU(pix[1][w1]) - LC2VFU(pix[1][-w1]))); + dDv = onev / (onev + vabsf(LC2VFU(pix[c][ w2]) - LC2VFU(pix[c][0])) + vabsf(LC2VFU(pix[1][w1]) - LC2VFU(pix[1][-w1]))); + v0v = CLIPV(LC2VFU(pix[c][0]) + zd5v + ((LC2VFU(pix[1][-1]) - LC2VFU(pix[c][-1])) * dLv + (LC2VFU(pix[1][1]) - LC2VFU(pix[c][1])) * dRv + (LC2VFU(pix[1][-w1]) - LC2VFU(pix[c][-w1])) * dUv + (LC2VFU(pix[1][w1]) - LC2VFU(pix[c][w1])) * dDv ) / (dLv + dRv + dUv + dDv)); + STC2VFU(pix[1][0], v0v); + } + +#endif + + for (; col < width - 2; col += 2) { + int indx = row * width + col; + pix[c] = (float*)(*rgb[c]) + indx; + pix[1] = (float*)(*rgb[1]) + indx; + float dL = 1.f / (1.f + fabsf(pix[c][ -2] - pix[c][0]) + fabsf(pix[1][ 1] - pix[1][ -1])); + float dR = 1.f / (1.f + fabsf(pix[c][ 2] - pix[c][0]) + fabsf(pix[1][ 1] - pix[1][ -1])); + float dU = 1.f / (1.f + fabsf(pix[c][-w2] - pix[c][0]) + fabsf(pix[1][w1] - pix[1][-w1])); + float dD = 1.f / (1.f + fabsf(pix[c][ w2] - pix[c][0]) + fabsf(pix[1][w1] - pix[1][-w1])); + float v0 = (pix[c][0] + 0.5f + ((pix[1][ -1] - pix[c][ -1]) * dL + (pix[1][ 1] - pix[c][ 1]) * dR + (pix[1][-w1] - pix[c][-w1]) * dU + (pix[1][ w1] - pix[c][ w1]) * dD ) / (dL + dR + dU + dD)); + pix[1][0] = CLIP(v0); + } + } + + /* Reinforce interpolated red/blue pixels on GREEN pixel locations */ +#ifdef _OPENMP + #pragma omp for +#endif + + for (int row = 2; row < height - 2; row++) { + int col = 2 + (FC(row, 3) & 1); + int c = FC(row, col + 1); +#ifdef __SSE2__ + vfloat dLv, dRv, dUv, dDv, v0v; + vfloat onev = F2V(1.f); + vfloat zd5v = F2V(0.5f); + vfloat c65535v = F2V(65535.f); + + for (; col < width - 8; col += 8) { + int indx = row * width + col; + pix[1] = (float*)(*rgb[1]) + indx; + + for (int i = 0; i < 2; c = 2 - c, i++) { + pix[c] = (float*)(*rgb[c]) + indx; + dLv = onev / (onev + vabsf(LC2VFU(pix[1][ -2]) - LC2VFU(pix[1][0])) + vabsf(LC2VFU(pix[c][ 1]) - LC2VFU(pix[c][ -1]))); + dRv = onev / (onev + vabsf(LC2VFU(pix[1][ 2]) - LC2VFU(pix[1][0])) + vabsf(LC2VFU(pix[c][ 1]) - LC2VFU(pix[c][ -1]))); + dUv = onev / (onev + vabsf(LC2VFU(pix[1][-w2]) - LC2VFU(pix[1][0])) + vabsf(LC2VFU(pix[c][w1]) - LC2VFU(pix[c][-w1]))); + dDv = onev / (onev + vabsf(LC2VFU(pix[1][ w2]) - LC2VFU(pix[1][0])) + vabsf(LC2VFU(pix[c][w1]) - LC2VFU(pix[c][-w1]))); + v0v = CLIPV(LC2VFU(pix[1][0]) + zd5v - ((LC2VFU(pix[1][-1]) - LC2VFU(pix[c][-1])) * dLv + (LC2VFU(pix[1][1]) - LC2VFU(pix[c][1])) * dRv + (LC2VFU(pix[1][-w1]) - LC2VFU(pix[c][-w1])) * dUv + (LC2VFU(pix[1][w1]) - LC2VFU(pix[c][w1])) * dDv ) / (dLv + dRv + dUv + dDv)); + STC2VFU(pix[c][0], v0v); + } + } + +#endif + + for (; col < width - 2; col += 2) { + int indx = row * width + col; + pix[1] = (float*)(*rgb[1]) + indx; + + for (int i = 0; i < 2; c = 2 - c, i++) { + pix[c] = (float*)(*rgb[c]) + indx; + float dL = 1.f / (1.f + fabsf(pix[1][ -2] - pix[1][0]) + fabsf(pix[c][ 1] - pix[c][ -1])); + float dR = 1.f / (1.f + fabsf(pix[1][ 2] - pix[1][0]) + fabsf(pix[c][ 1] - pix[c][ -1])); + float dU = 1.f / (1.f + fabsf(pix[1][-w2] - pix[1][0]) + fabsf(pix[c][w1] - pix[c][-w1])); + float dD = 1.f / (1.f + fabsf(pix[1][ w2] - pix[1][0]) + fabsf(pix[c][w1] - pix[c][-w1])); + float v0 = (pix[1][0] + 0.5f - ((pix[1][ -1] - pix[c][ -1]) * dL + (pix[1][ 1] - pix[c][ 1]) * dR + (pix[1][-w1] - pix[c][-w1]) * dU + (pix[1][ w1] - pix[c][ w1]) * dD ) / (dL + dR + dU + dD)); + pix[c][0] = CLIP(v0); + } + } + } + + /* Reinforce integrated red/blue pixels on BLUE/RED pixel locations */ +#ifdef _OPENMP + #pragma omp for +#endif + + for (int row = 2; row < height - 2; row++) { + int col = 2 + (FC(row, 2) & 1); + int c = 2 - FC(row, col); +#ifdef __SSE2__ + vfloat dLv, dRv, dUv, dDv, v0v; + vfloat onev = F2V(1.f); + vfloat zd5v = F2V(0.5f); + vfloat c65535v = F2V(65535.f); + + for (; col < width - 8; col += 8) { + int indx = row * width + col; + pix[0] = (float*)(*rgb[0]) + indx; + pix[1] = (float*)(*rgb[1]) + indx; + pix[2] = (float*)(*rgb[2]) + indx; + int d = 2 - c; + dLv = onev / (onev + vabsf(LC2VFU(pix[d][ -2]) - LC2VFU(pix[d][0])) + vabsf(LC2VFU(pix[1][ 1]) - LC2VFU(pix[1][ -1]))); + dRv = onev / (onev + vabsf(LC2VFU(pix[d][ 2]) - LC2VFU(pix[d][0])) + vabsf(LC2VFU(pix[1][ 1]) - LC2VFU(pix[1][ -1]))); + dUv = onev / (onev + vabsf(LC2VFU(pix[d][-w2]) - LC2VFU(pix[d][0])) + vabsf(LC2VFU(pix[1][w1]) - LC2VFU(pix[1][-w1]))); + dDv = onev / (onev + vabsf(LC2VFU(pix[d][ w2]) - LC2VFU(pix[d][0])) + vabsf(LC2VFU(pix[1][w1]) - LC2VFU(pix[1][-w1]))); + v0v = CLIPV(LC2VFU(pix[1][0]) + zd5v - ((LC2VFU(pix[1][-1]) - LC2VFU(pix[c][-1])) * dLv + (LC2VFU(pix[1][1]) - LC2VFU(pix[c][1])) * dRv + (LC2VFU(pix[1][-w1]) - LC2VFU(pix[c][-w1])) * dUv + (LC2VFU(pix[1][w1]) - LC2VFU(pix[c][w1])) * dDv ) / (dLv + dRv + dUv + dDv)); + STC2VFU(pix[c][0], v0v); + } + +#endif + + for (; col < width - 2; col += 2) { + int indx = row * width + col; + pix[0] = (float*)(*rgb[0]) + indx; + pix[1] = (float*)(*rgb[1]) + indx; + pix[2] = (float*)(*rgb[2]) + indx; + int d = 2 - c; + float dL = 1.f / (1.f + fabsf(pix[d][ -2] - pix[d][0]) + fabsf(pix[1][ 1] - pix[1][ -1])); + float dR = 1.f / (1.f + fabsf(pix[d][ 2] - pix[d][0]) + fabsf(pix[1][ 1] - pix[1][ -1])); + float dU = 1.f / (1.f + fabsf(pix[d][-w2] - pix[d][0]) + fabsf(pix[1][w1] - pix[1][-w1])); + float dD = 1.f / (1.f + fabsf(pix[d][ w2] - pix[d][0]) + fabsf(pix[1][w1] - pix[1][-w1])); + float v0 = (pix[1][0] + 0.5f - ((pix[1][ -1] - pix[c][ -1]) * dL + (pix[1][ 1] - pix[c][ 1]) * dR + (pix[1][-w1] - pix[c][-w1]) * dU + (pix[1][ w1] - pix[c][ w1]) * dD ) / (dL + dR + dU + dD)); + pix[c][0] = CLIP(v0); + } + } + } // end parallel + } +} +#ifdef __SSE2__ +#undef CLIPV +#endif + +} /* namespace */ diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index f7d905357..28cf30010 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -129,7 +129,6 @@ public: void flushRawData () override; void flushRGB () override; void HLRecovery_Global (const procparams::ToneCurveParams &hrp) override; - void refinement_lassus (int PassCount); void refinement(int PassCount); void setBorder(unsigned int rawBorder) override {border = rawBorder;} bool isRGBSourceModified() const override @@ -267,7 +266,7 @@ protected: void hphd_demosaic(); void vng4_demosaic(const array2D &rawData, array2D &red, array2D &green, array2D &blue); void igv_interpolate(int winw, int winh); - void lmmse_interpolate_omp(int winw, int winh, array2D &rawData, array2D &red, array2D &green, array2D &blue, int iterations); + void lmmse_interpolate_omp(int winw, int winh, const array2D &rawData, array2D &red, array2D &green, array2D &blue, int iterations); void amaze_demosaic_RT(int winx, int winy, int winw, int winh, const array2D &rawData, array2D &red, array2D &green, array2D &blue, size_t chunkSize = 1, bool measure = false);//Emil's code for AMaZE void dual_demosaic_RT(bool isBayer, const procparams::RAWParams &raw, int winw, int winh, const array2D &rawData, array2D &red, array2D &green, array2D &blue, double &contrast, bool autoContrast = false); void fast_demosaic();//Emil's code for fast demosaicing