From f94cf7869675355ef1de18addc69c0b34bc4e444 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 10 Jan 2021 21:33:00 +0100 Subject: [PATCH 1/4] Improvement for rcd demosaic, #6054 --- rtengine/rcd_demosaic.cc | 149 +++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 76 deletions(-) diff --git a/rtengine/rcd_demosaic.cc b/rtengine/rcd_demosaic.cc index c986dd601..7f76ddcdb 100644 --- a/rtengine/rcd_demosaic.cc +++ b/rtengine/rcd_demosaic.cc @@ -21,7 +21,6 @@ #include "rawimagesource.h" #include "rt_math.h" #include "../rtgui/multilangmgr.h" -#include "opthelper.h" #include "StopWatch.h" using namespace std; @@ -75,9 +74,10 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) } const unsigned int cfarray[2][2] = {{FC(0,0), FC(0,1)}, {FC(1,0), FC(1,1)}}; - constexpr int rcdBorder = 9; - constexpr int tileSize = 214; - constexpr int tileSizeN = tileSize - 2 * rcdBorder; + constexpr int rcdBorder = 6; + constexpr int tileBorder = 9; + constexpr int tileSize = 140; + constexpr int tileSizeN = tileSize - 2 * tileBorder; const int numTh = H / (tileSizeN) + ((H % (tileSizeN)) ? 1 : 0); const int numTw = W / (tileSizeN) + ((W % (tileSizeN)) ? 1 : 0); constexpr int w1 = tileSize, w2 = 2 * tileSize, w3 = 3 * tileSize, w4 = 4 * tileSize; @@ -96,6 +96,8 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) float *const VH_Dir = (float*) calloc(tileSize * tileSize, sizeof *VH_Dir); float *const PQ_Dir = (float*) calloc(tileSize * tileSize / 2, sizeof *PQ_Dir); float *const lpf = PQ_Dir; // reuse buffer, they don't overlap in usage + float *const P_CDiff_Hpf = (float*) calloc(tileSize * tileSize / 2, sizeof *P_CDiff_Hpf); + float *const Q_CDiff_Hpf = (float*) calloc(tileSize * tileSize / 2, sizeof *Q_CDiff_Hpf); #ifdef _OPENMP #pragma omp for schedule(dynamic, chunkSize) collapse(2) nowait @@ -104,12 +106,12 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) for (int tc = 0; tc < numTw; ++tc) { const int rowStart = tr * tileSizeN; const int rowEnd = std::min(rowStart + tileSize, H); - if (rowStart + rcdBorder == rowEnd - rcdBorder) { + if (rowStart + tileBorder == rowEnd - tileBorder) { continue; } const int colStart = tc * tileSizeN; const int colEnd = std::min(colStart + tileSize, W); - if (colStart + rcdBorder == colEnd - rcdBorder) { + if (colStart + tileBorder == colEnd - tileBorder) { continue; } @@ -134,15 +136,38 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) * STEP 1: Find cardinal and diagonal interpolation directions */ - for (int row = 4; row < tileRows - 4; row++) { - for (int col = 4, indx = row * tileSize + col; col < tilecols - 4; col++, indx++) { - const float cfai = cfa[indx]; - //Calculate h/v local discrimination - float V_Stat = std::max(epssq, -18.f * cfai * (cfa[indx - w1] + cfa[indx + w1] + 2.f * (cfa[indx - w2] + cfa[indx + w2]) - cfa[indx - w3] - cfa[indx + w3]) - 2.f * cfai * (cfa[indx - w4] + cfa[indx + w4] - 19.f * cfai) - cfa[indx - w1] * (70.f * cfa[indx + w1] + 12.f * cfa[indx - w2] - 24.f * cfa[indx + w2] + 38.f * cfa[indx - w3] - 16.f * cfa[indx + w3] - 12.f * cfa[indx - w4] + 6.f * cfa[indx + w4] - 46.f * cfa[indx - w1]) + cfa[indx + w1] * (24.f * cfa[indx - w2] - 12.f * cfa[indx + w2] + 16.f * cfa[indx - w3] - 38.f * cfa[indx + w3] - 6.f * cfa[indx - w4] + 12.f * cfa[indx + w4] + 46.f * cfa[indx + w1]) + cfa[indx - w2] * (14.f * cfa[indx + w2] - 12.f * cfa[indx + w3] - 2.f * cfa[indx - w4] + 2.f * cfa[indx + w4] + 11.f * cfa[indx - w2]) + cfa[indx + w2] * (-12.f * cfa[indx - w3] + 2.f * (cfa[indx - w4] - cfa[indx + w4]) + 11.f * cfa[indx + w2]) + cfa[indx - w3] * (2.f * cfa[indx + w3] - 6.f * cfa[indx - w4] + 10.f * cfa[indx - w3]) + cfa[indx + w3] * (-6.f * cfa[indx + w4] + 10.f * cfa[indx + w3]) + cfa[indx - w4] * cfa[indx - w4] + cfa[indx + w4] * cfa[indx + w4]); - float H_Stat = std::max(epssq, -18.f * cfai * (cfa[indx - 1] + cfa[indx + 1] + 2.f * (cfa[indx - 2] + cfa[indx + 2]) - cfa[indx - 3] - cfa[indx + 3]) - 2.f * cfai * (cfa[indx - 4] + cfa[indx + 4] - 19.f * cfai) - cfa[indx - 1] * (70.f * cfa[indx + 1] + 12.f * cfa[indx - 2] - 24.f * cfa[indx + 2] + 38.f * cfa[indx - 3] - 16.f * cfa[indx + 3] - 12.f * cfa[indx - 4] + 6.f * cfa[indx + 4] - 46.f * cfa[indx - 1]) + cfa[indx + 1] * (24.f * cfa[indx - 2] - 12.f * cfa[indx + 2] + 16.f * cfa[indx - 3] - 38.f * cfa[indx + 3] - 6.f * cfa[indx - 4] + 12.f * cfa[indx + 4] + 46.f * cfa[indx + 1]) + cfa[indx - 2] * (14.f * cfa[indx + 2] - 12.f * cfa[indx + 3] - 2.f * cfa[indx - 4] + 2.f * cfa[indx + 4] + 11.f * cfa[indx - 2]) + cfa[indx + 2] * (-12.f * cfa[indx - 3] + 2.f * (cfa[indx - 4] - cfa[indx + 4]) + 11.f * cfa[indx + 2]) + cfa[indx - 3] * (2.f * cfa[indx + 3] - 6.f * cfa[indx - 4] + 10.f * cfa[indx - 3]) + cfa[indx + 3] * (-6.f * cfa[indx + 4] + 10.f * cfa[indx + 3]) + cfa[indx - 4] * cfa[indx - 4] + cfa[indx + 4] * cfa[indx + 4]); + float bufferV[3][tileSize - 8]; + + // Step 1.1: Calculate the square of the vertical and horizontal color difference high pass filter + for (int row = 3; row < std::min(tileRows - 3, 5); ++row) { + for (int col = 4, indx = row * tileSize + col; col < tilecols - 4; ++col, ++indx) { + bufferV[row - 3][col - 4] = SQR((cfa[indx - w3] - cfa[indx - w1] - cfa[indx + w1] + cfa[indx + w3]) - 3.f * (cfa[indx - w2] + cfa[indx + w2]) + 6.f * cfa[indx]); + } + } + + // Step 1.2: Obtain the vertical and horizontal directional discrimination strength + + float bufferH[tileSize - 6] ALIGNED16; + float* V0 = bufferV[0]; + float* V1 = bufferV[1]; + float* V2 = bufferV[2]; + for (int row = 4; row < tileRows - 4; ++row) { + for (int col = 3, indx = row * tileSize + col; col < tilecols - 3; ++col, ++indx) { + bufferH[col - 3] = SQR((cfa[indx - 3] - cfa[indx - 1] - cfa[indx + 1] + cfa[indx + 3]) - 3.f * (cfa[indx - 2] + cfa[indx + 2]) + 6.f * cfa[indx]); + } + for (int col = 4, indx = (row + 1) * tileSize + col; col < tilecols - 4; ++col, ++indx) { + V2[col - 4] = SQR((cfa[indx - w3] - cfa[indx - w1] - cfa[indx + w1] + cfa[indx + w3]) - 3.f * (cfa[indx - w2] + cfa[indx + w2]) + 6.f * cfa[indx]); + } + for (int col = 4, indx = row * tileSize + col; col < tilecols - 4; ++col, ++indx) { + + float V_Stat = std::max(epssq, V0[col - 4] + V1[col - 4] + V2[col - 4]); + float H_Stat = std::max(epssq, bufferH[col - 4] + bufferH[col - 3] + bufferH[col - 2]); VH_Dir[indx] = V_Stat / (V_Stat + H_Stat); } + // rotate pointers from row0, row1, row2 to row1, row2, row0 + std::swap(V0, V2); + std::swap(V0, V1); } /** @@ -150,11 +175,11 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) */ // Step 2.1: Low pass filter incorporating green, red and blue local samples from the raw data - for (int row = 2; row < tileRows - 2; row++) { + for (int row = 2; row < tileRows - 2; ++row) { for (int col = 2 + (fc(cfarray, row, 0) & 1), indx = row * tileSize + col, lpindx = indx / 2; col < tilecols - 2; col += 2, indx += 2, ++lpindx) { - lpf[lpindx] = 0.25f * cfa[indx] + - 0.125f * (cfa[indx - w1] + cfa[indx + w1] + cfa[indx - 1] + cfa[indx + 1]) + - 0.0625f * (cfa[indx - w1 - 1] + cfa[indx - w1 + 1] + cfa[indx + w1 - 1] + cfa[indx + w1 + 1]); + lpf[lpindx] = cfa[indx] + + 0.5f * (cfa[indx - w1] + cfa[indx + w1] + cfa[indx - 1] + cfa[indx + 1]) + + 0.25f * (cfa[indx - w1 - 1] + cfa[indx - w1 + 1] + cfa[indx + w1 - 1] + cfa[indx + w1 + 1]); } } @@ -162,48 +187,8 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) * STEP 3: Populate the green channel */ // Step 3.1: Populate the green channel at blue and red CFA positions - for (int row = 4; row < tileRows - 4; row++) { - int col = 4 + (fc(cfarray, row, 0) & 1); - int indx = row * tileSize + col; - int lpindx = indx / 2; -#ifdef __SSE2__ - const vfloat zd5v = F2V(0.5f); - const vfloat zd25v = F2V(0.25f); - const vfloat epsv = F2V(eps); - for (; col < tilecols - 7; col += 8, indx += 8, lpindx += 4) { - // Cardinal gradients - const vfloat cfai = LC2VFU(cfa[indx]); - const vfloat N_Grad = epsv + (vabsf(LC2VFU(cfa[indx - w1]) - LC2VFU(cfa[indx + w1])) + vabsf(cfai - LC2VFU(cfa[indx - w2]))) + (vabsf(LC2VFU(cfa[indx - w1]) - LC2VFU(cfa[indx - w3])) + vabsf(LC2VFU(cfa[indx - w2]) - LC2VFU(cfa[indx - w4]))); - const vfloat S_Grad = epsv + (vabsf(LC2VFU(cfa[indx - w1]) - LC2VFU(cfa[indx + w1])) + vabsf(cfai - LC2VFU(cfa[indx + w2]))) + (vabsf(LC2VFU(cfa[indx + w1]) - LC2VFU(cfa[indx + w3])) + vabsf(LC2VFU(cfa[indx + w2]) - LC2VFU(cfa[indx + w4]))); - const vfloat W_Grad = epsv + (vabsf(LC2VFU(cfa[indx - 1]) - LC2VFU(cfa[indx + 1])) + vabsf(cfai - LC2VFU(cfa[indx - 2]))) + (vabsf(LC2VFU(cfa[indx - 1]) - LC2VFU(cfa[indx - 3])) + vabsf(LC2VFU(cfa[indx - 2]) - LC2VFU(cfa[indx - 4]))); - const vfloat E_Grad = epsv + (vabsf(LC2VFU(cfa[indx - 1]) - LC2VFU(cfa[indx + 1])) + vabsf(cfai - LC2VFU(cfa[indx + 2]))) + (vabsf(LC2VFU(cfa[indx + 1]) - LC2VFU(cfa[indx + 3])) + vabsf(LC2VFU(cfa[indx + 2]) - LC2VFU(cfa[indx + 4]))); - - // Cardinal pixel estimations - const vfloat lpfi = LVFU(lpf[lpindx]); - const vfloat N_Est = LC2VFU(cfa[indx - w1]) + (LC2VFU(cfa[indx - w1]) * (lpfi - LVFU(lpf[lpindx - w1])) / (epsv + lpfi + LVFU(lpf[lpindx - w1]))); - const vfloat S_Est = LC2VFU(cfa[indx + w1]) + (LC2VFU(cfa[indx + w1]) * (lpfi - LVFU(lpf[lpindx + w1])) / (epsv + lpfi + LVFU(lpf[lpindx + w1]))); - const vfloat W_Est = LC2VFU(cfa[indx - 1]) + (LC2VFU(cfa[indx - 1]) * (lpfi - LVFU(lpf[lpindx - 1])) / (epsv + lpfi + LVFU(lpf[lpindx - 1]))); - const vfloat E_Est = LC2VFU(cfa[indx + 1]) + (LC2VFU(cfa[indx + 1]) * (lpfi - LVFU(lpf[lpindx + 1])) / (epsv + lpfi + LVFU(lpf[lpindx + 1]))); - - // Vertical and horizontal estimations - const vfloat V_Est = (S_Grad * N_Est + N_Grad * S_Est) / (N_Grad + S_Grad); - const vfloat H_Est = (W_Grad * E_Est + E_Grad * W_Est) / (E_Grad + W_Grad); - - // G@B and G@R interpolation - // Refined vertical and horizontal local discrimination - const vfloat VH_Central_Value = LC2VFU(VH_Dir[indx]); - const vfloat VH_Neighbourhood_Value = zd25v * ((LC2VFU(VH_Dir[indx - w1 - 1]) + LC2VFU(VH_Dir[indx - w1 + 1])) + (LC2VFU(VH_Dir[indx + w1 - 1]) + LC2VFU(VH_Dir[indx + w1 + 1]))); - -#if defined(__clang__) - const vfloat VH_Disc = vself(vmaskf_lt(vabsf(zd5v - VH_Central_Value), vabsf(zd5v - VH_Neighbourhood_Value)), VH_Neighbourhood_Value, VH_Central_Value); -#else - const vfloat VH_Disc = vabsf(zd5v - VH_Central_Value) < vabsf(zd5v - VH_Neighbourhood_Value) ? VH_Neighbourhood_Value : VH_Central_Value; -#endif - const vfloat result = vintpf(VH_Disc, H_Est, V_Est); - STC2VFU(rgb[1][indx], result); - } -#endif - for (; col < tilecols - 4; col += 2, indx += 2, ++lpindx) { + for (int row = 4; row < tileRows - 4; ++row) { + for (int col = 4 + (fc(cfarray, row, 0) & 1), indx = row * tileSize + col, lpindx = indx / 2; col < tilecols - 4; col += 2, indx += 2, ++lpindx) { // Cardinal gradients const float cfai = cfa[indx]; const float N_Grad = eps + (std::fabs(cfa[indx - w1] - cfa[indx + w1]) + std::fabs(cfai - cfa[indx - w2])) + (std::fabs(cfa[indx - w1] - cfa[indx - w3]) + std::fabs(cfa[indx - w2] - cfa[indx - w4])); @@ -213,10 +198,10 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) // Cardinal pixel estimations const float lpfi = lpf[lpindx]; - const float N_Est = cfa[indx - w1] * (1.f + (lpfi - lpf[lpindx - w1]) / (eps + lpfi + lpf[lpindx - w1])); - const float S_Est = cfa[indx + w1] * (1.f + (lpfi - lpf[lpindx + w1]) / (eps + lpfi + lpf[lpindx + w1])); - const float W_Est = cfa[indx - 1] * (1.f + (lpfi - lpf[lpindx - 1]) / (eps + lpfi + lpf[lpindx - 1])); - const float E_Est = cfa[indx + 1] * (1.f + (lpfi - lpf[lpindx + 1]) / (eps + lpfi + lpf[lpindx + 1])); + const float N_Est = cfa[indx - w1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx - w1]); + const float S_Est = cfa[indx + w1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx + w1]); + const float W_Est = cfa[indx - 1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx - 1]); + const float E_Est = cfa[indx + 1] * (lpfi + lpfi) / (eps + lpfi + lpf[lpindx + 1]); // Vertical and horizontal estimations const float V_Est = (S_Grad * N_Est + N_Grad * S_Est) / (N_Grad + S_Grad); @@ -236,21 +221,26 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) * STEP 4: Populate the red and blue channels */ - // Step 4.1: Calculate P/Q diagonal local discrimination - for (int row = rcdBorder - 4; row < tileRows - rcdBorder + 4; row++) { - for (int col = rcdBorder - 4 + (fc(cfarray, row, rcdBorder) & 1), indx = row * tileSize + col, pqindx = indx / 2; col < tilecols - rcdBorder + 4; col += 2, indx += 2, ++pqindx) { - const float cfai = cfa[indx]; + // Step 4.0: Calculate the square of the P/Q diagonals color difference high pass filter + for (int row = 3; row < tileRows - 3; ++row) { + for (int col = 3, indx = row * tileSize + col, indx2 = indx / 2; col < tilecols - 3; col+=2, indx+=2, indx2++ ) { + P_CDiff_Hpf[indx2] = SQR((cfa[indx - w3 - 3] - cfa[indx - w1 - 1] - cfa[indx + w1 + 1] + cfa[indx + w3 + 3]) - 3.f * (cfa[indx - w2 - 2] + cfa[indx + w2 + 2]) + 6.f * cfa[indx]); + Q_CDiff_Hpf[indx2] = SQR((cfa[indx - w3 + 3] - cfa[indx - w1 + 1] - cfa[indx + w1 - 1] + cfa[indx + w3 - 3]) - 3.f * (cfa[indx - w2 + 2] + cfa[indx + w2 - 2]) + 6.f * cfa[indx]); + } + } - float P_Stat = std::max(epssq, - 18.f * cfai * (cfa[indx - w1 - 1] + cfa[indx + w1 + 1] + 2.f * (cfa[indx - w2 - 2] + cfa[indx + w2 + 2]) - cfa[indx - w3 - 3] - cfa[indx + w3 + 3]) - 2.f * cfai * (cfa[indx - w4 - 4] + cfa[indx + w4 + 4] - 19.f * cfai) - cfa[indx - w1 - 1] * (70.f * cfa[indx + w1 + 1] - 12.f * cfa[indx - w2 - 2] + 24.f * cfa[indx + w2 + 2] - 38.f * cfa[indx - w3 - 3] + 16.f * cfa[indx + w3 + 3] + 12.f * cfa[indx - w4 - 4] - 6.f * cfa[indx + w4 + 4] + 46.f * cfa[indx - w1 - 1]) + cfa[indx + w1 + 1] * (24.f * cfa[indx - w2 - 2] - 12.f * cfa[indx + w2 + 2] + 16.f * cfa[indx - w3 - 3] - 38.f * cfa[indx + w3 + 3] - 6.f * cfa[indx - w4 - 4] + 12.f * cfa[indx + w4 + 4] + 46.f * cfa[indx + w1 + 1]) + cfa[indx - w2 - 2] * (14.f * cfa[indx + w2 + 2] - 12.f * cfa[indx + w3 + 3] - 2.f * (cfa[indx - w4 - 4] - cfa[indx + w4 + 4]) + 11.f * cfa[indx - w2 - 2]) - cfa[indx + w2 + 2] * (12.f * cfa[indx - w3 - 3] + 2.f * (cfa[indx - w4 - 4] - cfa[indx + w4 + 4]) + 11.f * cfa[indx + w2 + 2]) + cfa[indx - w3 - 3] * (2.f * cfa[indx + w3 + 3] - 6.f * cfa[indx - w4 - 4] + 10.f * cfa[indx - w3 - 3]) - cfa[indx + w3 + 3] * (6.f * cfa[indx + w4 + 4] + 10.f * cfa[indx + w3 + 3]) + cfa[indx - w4 - 4] * cfa[indx - w4 - 4] + cfa[indx + w4 + 4] * cfa[indx + w4 + 4]); - float Q_Stat = std::max(epssq, - 18.f * cfai * (cfa[indx + w1 - 1] + cfa[indx - w1 + 1] + 2.f * (cfa[indx + w2 - 2] + cfa[indx - w2 + 2]) - cfa[indx + w3 - 3] - cfa[indx - w3 + 3]) - 2.f * cfai * (cfa[indx + w4 - 4] + cfa[indx - w4 + 4] - 19.f * cfai) - cfa[indx + w1 - 1] * (70.f * cfa[indx - w1 + 1] - 12.f * cfa[indx + w2 - 2] + 24.f * cfa[indx - w2 + 2] - 38.f * cfa[indx + w3 - 3] + 16.f * cfa[indx - w3 + 3] + 12.f * cfa[indx + w4 - 4] - 6.f * cfa[indx - w4 + 4] + 46.f * cfa[indx + w1 - 1]) + cfa[indx - w1 + 1] * (24.f * cfa[indx + w2 - 2] - 12.f * cfa[indx - w2 + 2] + 16.f * cfa[indx + w3 - 3] - 38.f * cfa[indx - w3 + 3] - 6.f * cfa[indx + w4 - 4] + 12.f * cfa[indx - w4 + 4] + 46.f * cfa[indx - w1 + 1]) + cfa[indx + w2 - 2] * (14.f * cfa[indx - w2 + 2] - 12.f * cfa[indx - w3 + 3] - 2.f * (cfa[indx + w4 - 4] - cfa[indx - w4 + 4]) + 11.f * cfa[indx + w2 - 2]) - cfa[indx - w2 + 2] * (12.f * cfa[indx + w3 - 3] + 2.f * (cfa[indx + w4 - 4] - cfa[indx - w4 + 4]) + 11.f * cfa[indx - w2 + 2]) + cfa[indx + w3 - 3] * (2.f * cfa[indx - w3 + 3] - 6.f * cfa[indx + w4 - 4] + 10.f * cfa[indx + w3 - 3]) - cfa[indx - w3 + 3] * (6.f * cfa[indx - w4 + 4] + 10.f * cfa[indx - w3 + 3]) + cfa[indx + w4 - 4] * cfa[indx + w4 - 4] + cfa[indx - w4 + 4] * cfa[indx - w4 + 4]); - - PQ_Dir[pqindx] = P_Stat / (P_Stat + Q_Stat); + // Step 4.1: Obtain the P/Q diagonals directional discrimination strength + for (int row = 4; row < tileRows - 4; ++row) { + for (int col = 4 + FC(row, 0) & 1, indx = row * tileSize + col, indx2 = indx / 2, indx3 = (indx - w1 - 1) / 2, indx4 = (indx + w1 - 1) / 2; col < tilecols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ ) { + float P_Stat = std::max(epssq, P_CDiff_Hpf[indx3] + P_CDiff_Hpf[indx2] + P_CDiff_Hpf[indx4 + 1]); + float Q_Stat = std::max(epssq, Q_CDiff_Hpf[indx3 + 1] + Q_CDiff_Hpf[indx2] + Q_CDiff_Hpf[indx4]); + PQ_Dir[indx2] = P_Stat / (P_Stat + Q_Stat); } } // Step 4.2: Populate the red and blue channels at blue and red CFA positions - for (int row = rcdBorder - 3; row < tileRows - rcdBorder + 3; row++) { - for (int col = rcdBorder - 3 + (fc(cfarray, row, rcdBorder - 1) & 1), indx = row * tileSize + col, c = 2 - fc(cfarray, row, col), pqindx = indx / 2, pqindx2 = (indx - w1 - 1) / 2, pqindx3 = (indx + w1 - 1) / 2; col < tilecols - rcdBorder + 3; col += 2, indx += 2, ++pqindx, ++pqindx2, ++pqindx3) { + for (int row = 4; row < tileRows - 4; ++row) { + for (int col = 4 + (fc(cfarray, row, 0) & 1), indx = row * tileSize + col, c = 2 - fc(cfarray, row, col), pqindx = indx / 2, pqindx2 = (indx - w1 - 1) / 2, pqindx3 = (indx + w1 - 1) / 2; col < tilecols - 4; col += 2, indx += 2, ++pqindx, ++pqindx2, ++pqindx3) { // Refined P/Q diagonal local discrimination float PQ_Central_Value = PQ_Dir[pqindx]; @@ -280,8 +270,8 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) } // Step 4.3: Populate the red and blue channels at green CFA positions - for (int row = rcdBorder; row < tileRows - rcdBorder; row++) { - for (int col = rcdBorder + (fc(cfarray, row, rcdBorder - 1) & 1), indx = row * tileSize + col; col < tilecols - rcdBorder; col += 2, indx += 2) { + for (int row = 4; row < tileRows - 4; ++row) { + for (int col = 4 + (fc(cfarray, row, 1) & 1), indx = row * tileSize + col; col < tilecols - 4; col += 2, indx += 2) { // Refined vertical and horizontal local discrimination float VH_Central_Value = VH_Dir[indx]; @@ -323,8 +313,13 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) } } - for (int row = rowStart + rcdBorder; row < rowEnd - rcdBorder; ++row) { - for (int col = colStart + rcdBorder; col < colEnd - rcdBorder; ++col) { + // For the outermost tiles in all directions we can use a smaller border margin + const int last_vertical = rowEnd - ((tr == numTh - 1) ? rcdBorder : tileBorder); + const int first_horizontal = colStart + ((tc == 0) ? rcdBorder : tileBorder); + const int last_horizontal = colEnd - ((tc == numTw - 1) ? rcdBorder : tileBorder); + for(int row = rowStart + ((tr == 0) ? rcdBorder : tileBorder); row < last_vertical; row++) { +// for (int row = rowStart + tileBorder; row < rowEnd - tileBorder; ++row) { + for (int col = first_horizontal; col < last_horizontal; ++col) { int idx = (row - rowStart) * tileSize + col - colStart ; red[row][col] = std::max(0.f, rgb[0][idx] * scale); green[row][col] = std::max(0.f, rgb[1][idx] * scale); @@ -352,6 +347,8 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) free(rgb); free(VH_Dir); free(PQ_Dir); + free(P_CDiff_Hpf); + free(Q_CDiff_Hpf); } border_interpolate(W, H, rcdBorder, rawData, red, green, blue); From 59b7eb0bbd13606f61c7495752a13acf5838c9f4 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Thu, 14 Jan 2021 19:12:49 +0100 Subject: [PATCH 2/4] rcd: small speedup and cleaner code, #6054 --- rtengine/rcd_demosaic.cc | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/rtengine/rcd_demosaic.cc b/rtengine/rcd_demosaic.cc index 7f76ddcdb..b97b19bb9 100644 --- a/rtengine/rcd_demosaic.cc +++ b/rtengine/rcd_demosaic.cc @@ -76,7 +76,7 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) const unsigned int cfarray[2][2] = {{FC(0,0), FC(0,1)}, {FC(1,0), FC(1,1)}}; constexpr int rcdBorder = 6; constexpr int tileBorder = 9; - constexpr int tileSize = 140; + constexpr int tileSize = 194; constexpr int tileSizeN = tileSize - 2 * tileBorder; const int numTh = H / (tileSizeN) + ((H % (tileSizeN)) ? 1 : 0); const int numTw = W / (tileSizeN) + ((W % (tileSizeN)) ? 1 : 0); @@ -119,23 +119,15 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) const int tilecols = std::min(colEnd - colStart, tileSize); for (int row = rowStart; row < rowEnd; row++) { - int indx = (row - rowStart) * tileSize; const int c0 = fc(cfarray, row, colStart); const int c1 = fc(cfarray, row, colStart + 1); - int col = colStart; - for (; col < colEnd - 1; col+=2, indx+=2) { - cfa[indx] = rgb[c0][indx] = LIM01(rawData[row][col] / scale); - cfa[indx + 1] = rgb[c1][indx + 1] = LIM01(rawData[row][col + 1] / scale); - } - if (col < colEnd) { - cfa[indx] = rgb[c0][indx] = LIM01(rawData[row][col] / scale); + for (int col = colStart, indx = (row - rowStart) * tileSize; col < colEnd; ++col, ++indx) { + cfa[indx] = rgb[c0][indx] = rgb[c1][indx] = LIM01(rawData[row][col] / scale); } } - /** * STEP 1: Find cardinal and diagonal interpolation directions */ - float bufferV[3][tileSize - 8]; // Step 1.1: Calculate the square of the vertical and horizontal color difference high pass filter @@ -314,12 +306,12 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) } // For the outermost tiles in all directions we can use a smaller border margin - const int last_vertical = rowEnd - ((tr == numTh - 1) ? rcdBorder : tileBorder); - const int first_horizontal = colStart + ((tc == 0) ? rcdBorder : tileBorder); - const int last_horizontal = colEnd - ((tc == numTw - 1) ? rcdBorder : tileBorder); - for(int row = rowStart + ((tr == 0) ? rcdBorder : tileBorder); row < last_vertical; row++) { -// for (int row = rowStart + tileBorder; row < rowEnd - tileBorder; ++row) { - for (int col = first_horizontal; col < last_horizontal; ++col) { + const int firstVertical = rowStart + ((tr == 0) ? rcdBorder : tileBorder); + const int lastVertical = rowEnd - ((tr == numTh - 1) ? rcdBorder : tileBorder); + const int firstHorizontal = colStart + ((tc == 0) ? rcdBorder : tileBorder); + const int lastHorizontal = colEnd - ((tc == numTw - 1) ? rcdBorder : tileBorder); + for (int row = firstVertical; row < lastVertical; ++row) { + for (int col = firstHorizontal; col < lastHorizontal; ++col) { int idx = (row - rowStart) * tileSize + col - colStart ; red[row][col] = std::max(0.f, rgb[0][idx] * scale); green[row][col] = std::max(0.f, rgb[1][idx] * scale); From 369707df7a20f1fc3ec1e61b780f4594a2cce41b Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sat, 16 Jan 2021 23:32:24 +0100 Subject: [PATCH 3/4] rcd demosaic: added some comments, #6054 --- rtengine/rcd_demosaic.cc | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/rtengine/rcd_demosaic.cc b/rtengine/rcd_demosaic.cc index b97b19bb9..d582c23dd 100644 --- a/rtengine/rcd_demosaic.cc +++ b/rtengine/rcd_demosaic.cc @@ -35,7 +35,7 @@ unsigned fc(const unsigned int cfa[2][2], int r, int c) { namespace rtengine { -/** +/* * RATIO CORRECTED DEMOSAICING * Luis Sanz Rodriguez (luis.sanz.rodriguez(at)gmail(dot)com) * @@ -44,7 +44,12 @@ namespace rtengine * Original code from https://github.com/LuisSR/RCD-Demosaicing * Licensed under the GNU GPL version 3 */ + // Tiled version by Ingo Weyrich (heckflosse67@gmx.de) +// Luis Sanz Rodriguez significantly optimised the v 2.3 code and simplified the directional +// coefficients in an exact, shorter and more performant formula. +// In cooperation with Hanno Schwalm (hanno@schwalm-bremen.de) and Luis Sanz Rodriguez this has been tuned for performance. + void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) { // Test for RGB cfa @@ -60,7 +65,6 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) } std::unique_ptr stop; - if (measure) { std::cout << "Demosaicing " << W << "x" << H << " image using rcd with " << chunkSize << " tiles per thread" << std::endl; stop.reset(new StopWatch("rcd demosaic")); @@ -74,8 +78,8 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) } const unsigned int cfarray[2][2] = {{FC(0,0), FC(0,1)}, {FC(1,0), FC(1,1)}}; - constexpr int rcdBorder = 6; - constexpr int tileBorder = 9; + constexpr int tileBorder = 9; // avoid tile-overlap errors + constexpr int rcdBorder = 6; // for the outermost tiles we can have a smaller outer border constexpr int tileSize = 194; constexpr int tileSizeN = tileSize - 2 * tileBorder; const int numTh = H / (tileSizeN) + ((H % (tileSizeN)) ? 1 : 0); @@ -125,9 +129,8 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) cfa[indx] = rgb[c0][indx] = rgb[c1][indx] = LIM01(rawData[row][col] / scale); } } - /** - * STEP 1: Find cardinal and diagonal interpolation directions - */ + + // Step 1: Find cardinal and diagonal interpolation directions float bufferV[3][tileSize - 8]; // Step 1.1: Calculate the square of the vertical and horizontal color difference high pass filter @@ -138,7 +141,6 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) } // Step 1.2: Obtain the vertical and horizontal directional discrimination strength - float bufferH[tileSize - 6] ALIGNED16; float* V0 = bufferV[0]; float* V1 = bufferV[1]; @@ -162,11 +164,7 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) std::swap(V0, V1); } - /** - * STEP 2: Calculate the low pass filter - */ - // Step 2.1: Low pass filter incorporating green, red and blue local samples from the raw data - + // Step 2: Low pass filter incorporating green, red and blue local samples from the raw data for (int row = 2; row < tileRows - 2; ++row) { for (int col = 2 + (fc(cfarray, row, 0) & 1), indx = row * tileSize + col, lpindx = indx / 2; col < tilecols - 2; col += 2, indx += 2, ++lpindx) { lpf[lpindx] = cfa[indx] + @@ -175,10 +173,7 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) } } - /** - * STEP 3: Populate the green channel - */ - // Step 3.1: Populate the green channel at blue and red CFA positions + // Step 3: Populate the green channel at blue and red CFA positions for (int row = 4; row < tileRows - 4; ++row) { for (int col = 4 + (fc(cfarray, row, 0) & 1), indx = row * tileSize + col, lpindx = indx / 2; col < tilecols - 4; col += 2, indx += 2, ++lpindx) { // Cardinal gradients From d607028871579eb4bf14cf93bb8726679dd5933b Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sun, 17 Jan 2021 15:13:21 +0100 Subject: [PATCH 4/4] rcd demosaic: small change --- rtengine/rcd_demosaic.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/rcd_demosaic.cc b/rtengine/rcd_demosaic.cc index d582c23dd..b12ce1473 100644 --- a/rtengine/rcd_demosaic.cc +++ b/rtengine/rcd_demosaic.cc @@ -218,7 +218,7 @@ void RawImageSource::rcd_demosaic(size_t chunkSize, bool measure) // Step 4.1: Obtain the P/Q diagonals directional discrimination strength for (int row = 4; row < tileRows - 4; ++row) { - for (int col = 4 + FC(row, 0) & 1, indx = row * tileSize + col, indx2 = indx / 2, indx3 = (indx - w1 - 1) / 2, indx4 = (indx + w1 - 1) / 2; col < tilecols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ ) { + for (int col = 4 + (fc(cfarray, row, 0) & 1), indx = row * tileSize + col, indx2 = indx / 2, indx3 = (indx - w1 - 1) / 2, indx4 = (indx + w1 - 1) / 2; col < tilecols - 4; col += 2, indx += 2, indx2++, indx3++, indx4++ ) { float P_Stat = std::max(epssq, P_CDiff_Hpf[indx3] + P_CDiff_Hpf[indx2] + P_CDiff_Hpf[indx4 + 1]); float Q_Stat = std::max(epssq, Q_CDiff_Hpf[indx3 + 1] + Q_CDiff_Hpf[indx2] + Q_CDiff_Hpf[indx4]); PQ_Dir[indx2] = P_Stat / (P_Stat + Q_Stat);