From a4acc1dca353700ee2960b82cb72d22a9eaf08d9 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Wed, 30 Jan 2019 22:56:44 +0100 Subject: [PATCH 1/5] Speedup for hphd demosaic --- rtengine/demosaic_algos.cc | 241 +++++++++++++++++++------------------ 1 file changed, 126 insertions(+), 115 deletions(-) diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index aa763f01f..5acf87489 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -37,7 +37,7 @@ #include "sleef.c" #include "opthelper.h" #include "median.h" -//#define BENCHMARK +#define BENCHMARK #include "StopWatch.h" #ifdef _OPENMP #include @@ -62,88 +62,100 @@ extern const Settings* settings; void RawImageSource::hphd_vertical (float** hpmap, int col_from, int col_to) { - float* temp = new float[max(W, H)]; - float* avg = new float[max(W, H)]; - float* dev = new float[max(W, H)]; +// BENCHFUN + constexpr int numCols = 8; + JaggedArray temp(numCols, H, true); + JaggedArray avg(numCols, H, true); + JaggedArray dev(numCols, H, true); - memset (temp, 0, max(W, H)*sizeof(float)); - memset (avg, 0, max(W, H)*sizeof(float)); - memset (dev, 0, max(W, H)*sizeof(float)); - - for (int k = col_from; k < col_to; k++) { + int k = col_from; + for (; k < col_to - 7; k += numCols) { for (int i = 5; i < H - 5; i++) { - temp[i] = (rawData[i - 5][k] - 8 * rawData[i - 4][k] + 27 * rawData[i - 3][k] - 48 * rawData[i - 2][k] + 42 * rawData[i - 1][k] - - (rawData[i + 5][k] - 8 * rawData[i + 4][k] + 27 * rawData[i + 3][k] - 48 * rawData[i + 2][k] + 42 * rawData[i + 1][k])) / 100.0; - temp[i] = ABS(temp[i]); + #pragma omp simd + for(int h = 0; h < numCols; ++h) { + temp[i][h] = std::fabs((rawData[i - 5][k + h] - rawData[i + 5][k + h]) - 8 * (rawData[i - 4][k + h] - rawData[i + 4][k + h]) + 27 * (rawData[i - 3][k + h] - rawData[i + 3][k + h]) - 48 * (rawData[i - 2][k + h] - rawData[i + 2][k + h]) + 42 * (rawData[i - 1][k + h] - rawData[i - 1][k + h])); + } } for (int j = 4; j < H - 4; j++) { - float avgL = (temp[j - 4] + temp[j - 3] + temp[j - 2] + temp[j - 1] + temp[j] + temp[j + 1] + temp[j + 2] + temp[j + 3] + temp[j + 4]) / 9.0; - avg[j] = avgL; - float devL = ((temp[j - 4] - avgL) * (temp[j - 4] - avgL) + (temp[j - 3] - avgL) * (temp[j - 3] - avgL) + (temp[j - 2] - avgL) * (temp[j - 2] - avgL) + (temp[j - 1] - avgL) * (temp[j - 1] - avgL) + (temp[j] - avgL) * (temp[j] - avgL) + (temp[j + 1] - avgL) * (temp[j + 1] - avgL) + (temp[j + 2] - avgL) * (temp[j + 2] - avgL) + (temp[j + 3] - avgL) * (temp[j + 3] - avgL) + (temp[j + 4] - avgL) * (temp[j + 4] - avgL)) / 9.0; - - if (devL < 0.001) { - devL = 0.001; + #pragma omp simd + for(int h = 0; h < numCols; ++h) { + const float avgL = (temp[j - 4][h] + temp[j - 3][h] + temp[j - 2][h] + temp[j - 1][h] + temp[j][h] + temp[j + 1][h] + temp[j + 2][h] + temp[j + 3][h] + temp[j + 4][h]) / 9.f; + avg[j][h] = avgL; + dev[j][h] = std::max(0.001f, SQR(temp[j - 4][h] - avgL) + SQR(temp[j - 3][h] - avgL) + SQR(temp[j - 2][h] - avgL) + SQR(temp[j - 1][h] - avgL) + SQR(temp[j][h] - avgL) + SQR(temp[j + 1][h] - avgL) + SQR(temp[j + 2][h] - avgL) + SQR(temp[j + 3][h] - avgL) + SQR(temp[j + 4][h] - avgL)); } - - dev[j] = devL; } for (int j = 5; j < H - 5; j++) { - float avgL = avg[j - 1]; - float avgR = avg[j + 1]; - float devL = dev[j - 1]; - float devR = dev[j + 1]; + #pragma omp simd + for(int h = 0; h < numCols; ++h) { + const float avgL = avg[j - 1][h]; + const float avgR = avg[j + 1][h]; + const float devL = dev[j - 1][h]; + const float devR = dev[j + 1][h]; + hpmap[j][k + h] = avgL + (avgR - avgL) * devL / (devL + devR); + } + } + } + for (; k < col_to; k++) { + for (int i = 5; i < H - 5; i++) { + temp[i][0] = std::fabs((rawData[i - 5][k] - rawData[i + 5][k]) - 8 * (rawData[i - 4][k] - rawData[i + 4][k]) + 27 * (rawData[i - 3][k] - rawData[i + 3][k]) - 48 * (rawData[i - 2][k] - rawData[i + 2][k]) + 42 * (rawData[i - 1][k] -rawData[i + 1][k])); + } + + for (int j = 4; j < H - 4; j++) { + const float avgL = (temp[j - 4][0] + temp[j - 3][0] + temp[j - 2][0] + temp[j - 1][0] + temp[j][0] + temp[j + 1][0] + temp[j + 2][0] + temp[j + 3][0] + temp[j + 4][0]) / 9.f; + avg[j][0] = avgL; + dev[j][0] = std::max(0.001f, SQR(temp[j - 4][0] - avgL) + SQR(temp[j - 3][0] - avgL) + SQR(temp[j - 2][0] - avgL) + SQR(temp[j - 1][0] - avgL) + SQR(temp[j][0] - avgL) + SQR(temp[j + 1][0] - avgL) + SQR(temp[j + 2][0] - avgL) + SQR(temp[j + 3][0] - avgL) + SQR(temp[j + 4][0] - avgL)); + } + + for (int j = 5; j < H - 5; j++) { + const float avgL = avg[j - 1][0]; + const float avgR = avg[j + 1][0]; + const float devL = dev[j - 1][0]; + const float devR = dev[j + 1][0]; hpmap[j][k] = avgL + (avgR - avgL) * devL / (devL + devR); } } - delete [] temp; - delete [] avg; - delete [] dev; } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% void RawImageSource::hphd_horizontal (float** hpmap, int row_from, int row_to) { - float* temp = new float[max(W, H)]; - float* avg = new float[max(W, H)]; - float* dev = new float[max(W, H)]; +// BENCHFUN + float* temp = new float[W]; + float* avg = new float[W]; + float* dev = new float[W]; - memset (temp, 0, max(W, H)*sizeof(float)); - memset (avg, 0, max(W, H)*sizeof(float)); - memset (dev, 0, max(W, H)*sizeof(float)); + memset(temp, 0, W * sizeof(float)); + memset(avg, 0, W * sizeof(float)); + memset(dev, 0, W * sizeof(float)); for (int i = row_from; i < row_to; i++) { + #pragma omp simd for (int j = 5; j < W - 5; j++) { - temp[j] = (rawData[i][j - 5] - 8 * rawData[i][j - 4] + 27 * rawData[i][j - 3] - 48 * rawData[i][j - 2] + 42 * rawData[i][j - 1] - - (rawData[i][j + 5] - 8 * rawData[i][j + 4] + 27 * rawData[i][j + 3] - 48 * rawData[i][j + 2] + 42 * rawData[i][j + 1])) / 100; - temp[j] = ABS(temp[j]); + temp[j] = std::fabs((rawData[i][j - 5] - rawData[i][j + 5]) - 8 * (rawData[i][j - 4] - rawData[i][j + 4]) + 27 * (rawData[i][j - 3] - rawData[i][j + 3]) - 48 * (rawData[i][j - 2] - rawData[i][j + 2]) + 42 * (rawData[i][j - 1] - rawData[i][j + 1])); } + #pragma omp simd for (int j = 4; j < W - 4; j++) { - float avgL = (temp[j - 4] + temp[j - 3] + temp[j - 2] + temp[j - 1] + temp[j] + temp[j + 1] + temp[j + 2] + temp[j + 3] + temp[j + 4]) / 9.0; + const float avgL = ((temp[j - 4] + temp[j - 3]) + (temp[j - 2] + temp[j - 1]) + (temp[j] + temp[j + 1]) + (temp[j + 2] + temp[j + 3]) + temp[j + 4]) / 9.f; avg[j] = avgL; - float devL = ((temp[j - 4] - avgL) * (temp[j - 4] - avgL) + (temp[j - 3] - avgL) * (temp[j - 3] - avgL) + (temp[j - 2] - avgL) * (temp[j - 2] - avgL) + (temp[j - 1] - avgL) * (temp[j - 1] - avgL) + (temp[j] - avgL) * (temp[j] - avgL) + (temp[j + 1] - avgL) * (temp[j + 1] - avgL) + (temp[j + 2] - avgL) * (temp[j + 2] - avgL) + (temp[j + 3] - avgL) * (temp[j + 3] - avgL) + (temp[j + 4] - avgL) * (temp[j + 4] - avgL)) / 9.0; - - if (devL < 0.001) { - devL = 0.001; - } - - dev[j] = devL; + dev[j] = std::max(0.001f, SQR(temp[j - 4] - avgL) + SQR(temp[j - 3] - avgL) + SQR(temp[j - 2] - avgL) + SQR(temp[j - 1] - avgL) + SQR(temp[j] - avgL) + SQR(temp[j + 1] - avgL) + SQR(temp[j + 2] - avgL) + SQR(temp[j + 3] - avgL) + SQR(temp[j + 4] - avgL)); } + #pragma omp simd for (int j = 5; j < W - 5; j++) { - float avgL = avg[j - 1]; - float avgR = avg[j + 1]; - float devL = dev[j - 1]; - float devR = dev[j + 1]; - float hpv = avgL + (avgR - avgL) * devL / (devL + devR); + const float avgL = avg[j - 1]; + const float avgR = avg[j + 1]; + const float devL = dev[j - 1]; + const float devR = dev[j + 1]; + const float hpv = avgL + (avgR - avgL) * devL / (devL + devR); - if (hpmap[i][j] < 0.8 * hpv) { + if (hpmap[i][j] < 0.8f * hpv) { hpmap[i][j] = 2; - } else if (hpv < 0.8 * hpmap[i][j]) { + } else if (hpv < 0.8f * hpmap[i][j]) { hpmap[i][j] = 1; } else { hpmap[i][j] = 0; @@ -160,8 +172,11 @@ void RawImageSource::hphd_horizontal (float** hpmap, int row_from, int row_to) void RawImageSource::hphd_green (float** hpmap) { +// BENCHFUN + + constexpr float eps = 0.001f; #ifdef _OPENMP - #pragma omp parallel for + #pragma omp parallel for schedule(dynamic, 16) #endif for (int i = 3; i < H - 3; i++) { @@ -170,81 +185,81 @@ void RawImageSource::hphd_green (float** hpmap) green[i][j] = rawData[i][j]; } else { if (hpmap[i][j] == 1) { - int g2 = rawData[i][j + 1] + ((rawData[i][j] - rawData[i][j + 2]) / 2); - int g4 = rawData[i][j - 1] + ((rawData[i][j] - rawData[i][j - 2]) / 2); + const float g2 = rawData[i][j + 1] + (rawData[i][j] - rawData[i][j + 2]) * 0.5f; + const float g4 = rawData[i][j - 1] + (rawData[i][j] - rawData[i][j - 2]) * 0.5f; - int dx = rawData[i][j + 1] - rawData[i][j - 1]; - int d1 = rawData[i][j + 3] - rawData[i][j + 1]; - int d2 = rawData[i][j + 2] - rawData[i][j]; - int d3 = (rawData[i - 1][j + 2] - rawData[i - 1][j]) / 2; - int d4 = (rawData[i + 1][j + 2] - rawData[i + 1][j]) / 2; + const float dx = eps + std::fabs(rawData[i][j + 1] - rawData[i][j - 1]); + float d1 = rawData[i][j + 3] - rawData[i][j + 1]; + float d2 = rawData[i][j + 2] - rawData[i][j]; + float d3 = rawData[i - 1][j + 2] - rawData[i - 1][j]; + float d4 = rawData[i + 1][j + 2] - rawData[i + 1][j]; - double e2 = 1.0 / (1.0 + ABS(dx) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4)); + const float e2 = 1.f / (dx + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); d1 = rawData[i][j - 3] - rawData[i][j - 1]; d2 = rawData[i][j - 2] - rawData[i][j]; - d3 = (rawData[i - 1][j - 2] - rawData[i - 1][j]) / 2; - d4 = (rawData[i + 1][j - 2] - rawData[i + 1][j]) / 2; + d3 = rawData[i - 1][j - 2] - rawData[i - 1][j]; + d4 = rawData[i + 1][j - 2] - rawData[i + 1][j]; - double e4 = 1.0 / (1.0 + ABS(dx) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4)); + const float e4 = 1.f / (dx + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); green[i][j] = (e2 * g2 + e4 * g4) / (e2 + e4); } else if (hpmap[i][j] == 2) { - int g1 = rawData[i - 1][j] + ((rawData[i][j] - rawData[i - 2][j]) / 2); - int g3 = rawData[i + 1][j] + ((rawData[i][j] - rawData[i + 2][j]) / 2); + const float g1 = rawData[i - 1][j] + (rawData[i][j] - rawData[i - 2][j]) * 0.5f; + const float g3 = rawData[i + 1][j] + (rawData[i][j] - rawData[i + 2][j]) * 0.5f; - int dy = rawData[i + 1][j] - rawData[i - 1][j]; - int d1 = rawData[i - 1][j] - rawData[i - 3][j]; - int d2 = rawData[i][j] - rawData[i - 2][j]; - int d3 = (rawData[i][j - 1] - rawData[i - 2][j - 1]) / 2; - int d4 = (rawData[i][j + 1] - rawData[i - 2][j + 1]) / 2; + const float dy = eps + std::fabs(rawData[i + 1][j] - rawData[i - 1][j]); + float d1 = rawData[i - 1][j] - rawData[i - 3][j]; + float d2 = rawData[i][j] - rawData[i - 2][j]; + float d3 = rawData[i][j - 1] - rawData[i - 2][j - 1]; + float d4 = rawData[i][j + 1] - rawData[i - 2][j + 1]; - double e1 = 1.0 / (1.0 + ABS(dy) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4)); + const float e1 = 1.f / (dy + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); d1 = rawData[i + 1][j] - rawData[i + 3][j]; d2 = rawData[i][j] - rawData[i + 2][j]; - d3 = (rawData[i][j - 1] - rawData[i + 2][j - 1]) / 2; - d4 = (rawData[i][j + 1] - rawData[i + 2][j + 1]) / 2; + d3 = rawData[i][j - 1] - rawData[i + 2][j - 1]; + d4 = rawData[i][j + 1] - rawData[i + 2][j + 1]; - double e3 = 1.0 / (1.0 + ABS(dy) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4)); + const float e3 = 1.f / (dy + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); green[i][j] = (e1 * g1 + e3 * g3) / (e1 + e3); } else { - int g1 = rawData[i - 1][j] + ((rawData[i][j] - rawData[i - 2][j]) / 2); - int g2 = rawData[i][j + 1] + ((rawData[i][j] - rawData[i][j + 2]) / 2); - int g3 = rawData[i + 1][j] + ((rawData[i][j] - rawData[i + 2][j]) / 2); - int g4 = rawData[i][j - 1] + ((rawData[i][j] - rawData[i][j - 2]) / 2); + const float g1 = rawData[i - 1][j] + (rawData[i][j] - rawData[i - 2][j]) * 0.5f; + const float g2 = rawData[i][j + 1] + (rawData[i][j] - rawData[i][j + 2]) * 0.5f; + const float g3 = rawData[i + 1][j] + (rawData[i][j] - rawData[i + 2][j]) * 0.5f; + const float g4 = rawData[i][j - 1] + (rawData[i][j] - rawData[i][j - 2]) * 0.5f; - int dx = rawData[i][j + 1] - rawData[i][j - 1]; - int dy = rawData[i + 1][j] - rawData[i - 1][j]; + const float dx = eps + std::fabs(rawData[i][j + 1] - rawData[i][j - 1]); + const float dy = eps + std::fabs(rawData[i + 1][j] - rawData[i - 1][j]); - int d1 = rawData[i - 1][j] - rawData[i - 3][j]; - int d2 = rawData[i][j] - rawData[i - 2][j]; - int d3 = (rawData[i][j - 1] - rawData[i - 2][j - 1]) / 2; - int d4 = (rawData[i][j + 1] - rawData[i - 2][j + 1]) / 2; + float d1 = rawData[i - 1][j] - rawData[i - 3][j]; + float d2 = rawData[i][j] - rawData[i - 2][j]; + float d3 = rawData[i][j - 1] - rawData[i - 2][j - 1]; + float d4 = rawData[i][j + 1] - rawData[i - 2][j + 1]; - double e1 = 1.0 / (1.0 + ABS(dy) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4)); + const float e1 = 1.f / (dy + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); d1 = rawData[i][j + 3] - rawData[i][j + 1]; d2 = rawData[i][j + 2] - rawData[i][j]; - d3 = (rawData[i - 1][j + 2] - rawData[i - 1][j]) / 2; - d4 = (rawData[i + 1][j + 2] - rawData[i + 1][j]) / 2; + d3 = rawData[i - 1][j + 2] - rawData[i - 1][j]; + d4 = rawData[i + 1][j + 2] - rawData[i + 1][j]; - double e2 = 1.0 / (1.0 + ABS(dx) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4)); + const float e2 = 1.f / (dx + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); d1 = rawData[i + 1][j] - rawData[i + 3][j]; d2 = rawData[i][j] - rawData[i + 2][j]; - d3 = (rawData[i][j - 1] - rawData[i + 2][j - 1]) / 2; - d4 = (rawData[i][j + 1] - rawData[i + 2][j + 1]) / 2; + d3 = rawData[i][j - 1] - rawData[i + 2][j - 1]; + d4 = rawData[i][j + 1] - rawData[i + 2][j + 1]; - double e3 = 1.0 / (1.0 + ABS(dy) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4)); + const float e3 = 1.f / (dy + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); d1 = rawData[i][j - 3] - rawData[i][j - 1]; d2 = rawData[i][j - 2] - rawData[i][j]; - d3 = (rawData[i - 1][j - 2] - rawData[i - 1][j]) / 2; - d4 = (rawData[i + 1][j - 2] - rawData[i + 1][j]) / 2; + d3 = rawData[i - 1][j - 2] - rawData[i - 1][j]; + d4 = rawData[i + 1][j - 2] - rawData[i + 1][j]; - double e4 = 1.0 / (1.0 + ABS(dx) + ABS(d1) + ABS(d2) + ABS(d3) + ABS(d4)); + const float e4 = 1.f / (dx + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); green[i][j] = (e1 * g1 + e2 * g2 + e3 * g3 + e4 * g4) / (e1 + e2 + e3 + e4); } @@ -257,12 +272,13 @@ void RawImageSource::hphd_green (float** hpmap) void RawImageSource::hphd_demosaic () { + BENCHFUN if (plistener) { - plistener->setProgressStr (Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::HPHD))); - plistener->setProgress (0.0); + plistener->setProgressStr(Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::HPHD))); + plistener->setProgress(0.0); } - JaggedArray hpmap (W, H, true); + JaggedArray hpmap(W, H, true); #ifdef _OPENMP #pragma omp parallel @@ -272,17 +288,17 @@ void RawImageSource::hphd_demosaic () int blk = W / nthreads; if (tid < nthreads - 1) { - hphd_vertical (hpmap, tid * blk, (tid + 1)*blk); + hphd_vertical(hpmap, tid * blk, (tid + 1)*blk); } else { - hphd_vertical (hpmap, tid * blk, W); + hphd_vertical(hpmap, tid * blk, W); } } #else - hphd_vertical (hpmap, 0, W); + hphd_vertical(hpmap, 0, W); #endif if (plistener) { - plistener->setProgress (0.33); + plistener->setProgress(0.33); } #ifdef _OPENMP @@ -293,35 +309,30 @@ void RawImageSource::hphd_demosaic () int blk = H / nthreads; if (tid < nthreads - 1) { - hphd_horizontal (hpmap, tid * blk, (tid + 1)*blk); + hphd_horizontal(hpmap, tid * blk, (tid + 1)*blk); } else { - hphd_horizontal (hpmap, tid * blk, H); + hphd_horizontal(hpmap, tid * blk, H); } } #else - hphd_horizontal (hpmap, 0, H); + hphd_horizontal(hpmap, 0, H); #endif - hphd_green (hpmap); + hphd_green(hpmap); if (plistener) { - plistener->setProgress (0.66); + plistener->setProgress(0.66); } - for (int i = 0; i < H; i++) { - if (i == 0) { - interpolate_row_rb_mul_pp (rawData, red[i], blue[i], nullptr, green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1); - } else if (i == H - 1) { - interpolate_row_rb_mul_pp (rawData, red[i], blue[i], green[i - 1], green[i], nullptr, i, 1.0, 1.0, 1.0, 0, W, 1); - } else { - interpolate_row_rb_mul_pp (rawData, red[i], blue[i], green[i - 1], green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1); - } + #pragma omp parallel for + for (int i = 4; i < H - 4; i++) { + interpolate_row_rb_mul_pp(rawData, red[i], blue[i], green[i - 1], green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1); } border_interpolate2(W, H, 4, rawData, red, green, blue); if (plistener) { - plistener->setProgress (1.0); + plistener->setProgress(1.0); } } From ac152ac423a779f0c9b4406527140ea71cc4f062 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Thu, 31 Jan 2019 22:20:23 +0100 Subject: [PATCH 2/5] hphd demosaic: small speedup and own compilation unit, #5159 --- rtengine/CMakeLists.txt | 1 + rtengine/demosaic_algos.cc | 283 ---------------------------- rtengine/hphd_demosaic_RT.cc | 352 +++++++++++++++++++++++++++++++++++ rtengine/rawimagesource.h | 3 - 4 files changed, 353 insertions(+), 286 deletions(-) create mode 100644 rtengine/hphd_demosaic_RT.cc diff --git a/rtengine/CMakeLists.txt b/rtengine/CMakeLists.txt index 4ffad24a1..cf3d8c8b6 100644 --- a/rtengine/CMakeLists.txt +++ b/rtengine/CMakeLists.txt @@ -63,6 +63,7 @@ set(RTENGINESOURCEFILES gauss.cc green_equil_RT.cc hilite_recon.cc + hphd_demosaic_RT.cc iccjpeg.cc iccstore.cc icons.cc diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index 5acf87489..a4d896eec 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -20,16 +20,9 @@ #include #include "rawimagesource.h" -#include "rawimagesource_i.h" -#include "jaggedarray.h" #include "rawimage.h" #include "mytime.h" -#include "iccmatrices.h" -#include "iccstore.h" #include "image8.h" -#include "curves.h" -#include "dfmanager.h" -#include "slicer.h" #include "rt_math.h" #include "color.h" #include "../rtgui/multilangmgr.h" @@ -60,282 +53,6 @@ extern const Settings* settings; #define x00625(a) xdivf(a, 4) #define x0125(a) xdivf(a, 3) -void RawImageSource::hphd_vertical (float** hpmap, int col_from, int col_to) -{ -// BENCHFUN - constexpr int numCols = 8; - JaggedArray temp(numCols, H, true); - JaggedArray avg(numCols, H, true); - JaggedArray dev(numCols, H, true); - - int k = col_from; - for (; k < col_to - 7; k += numCols) { - for (int i = 5; i < H - 5; i++) { - #pragma omp simd - for(int h = 0; h < numCols; ++h) { - temp[i][h] = std::fabs((rawData[i - 5][k + h] - rawData[i + 5][k + h]) - 8 * (rawData[i - 4][k + h] - rawData[i + 4][k + h]) + 27 * (rawData[i - 3][k + h] - rawData[i + 3][k + h]) - 48 * (rawData[i - 2][k + h] - rawData[i + 2][k + h]) + 42 * (rawData[i - 1][k + h] - rawData[i - 1][k + h])); - } - } - - for (int j = 4; j < H - 4; j++) { - #pragma omp simd - for(int h = 0; h < numCols; ++h) { - const float avgL = (temp[j - 4][h] + temp[j - 3][h] + temp[j - 2][h] + temp[j - 1][h] + temp[j][h] + temp[j + 1][h] + temp[j + 2][h] + temp[j + 3][h] + temp[j + 4][h]) / 9.f; - avg[j][h] = avgL; - dev[j][h] = std::max(0.001f, SQR(temp[j - 4][h] - avgL) + SQR(temp[j - 3][h] - avgL) + SQR(temp[j - 2][h] - avgL) + SQR(temp[j - 1][h] - avgL) + SQR(temp[j][h] - avgL) + SQR(temp[j + 1][h] - avgL) + SQR(temp[j + 2][h] - avgL) + SQR(temp[j + 3][h] - avgL) + SQR(temp[j + 4][h] - avgL)); - } - } - - for (int j = 5; j < H - 5; j++) { - #pragma omp simd - for(int h = 0; h < numCols; ++h) { - const float avgL = avg[j - 1][h]; - const float avgR = avg[j + 1][h]; - const float devL = dev[j - 1][h]; - const float devR = dev[j + 1][h]; - hpmap[j][k + h] = avgL + (avgR - avgL) * devL / (devL + devR); - } - } - } - for (; k < col_to; k++) { - for (int i = 5; i < H - 5; i++) { - temp[i][0] = std::fabs((rawData[i - 5][k] - rawData[i + 5][k]) - 8 * (rawData[i - 4][k] - rawData[i + 4][k]) + 27 * (rawData[i - 3][k] - rawData[i + 3][k]) - 48 * (rawData[i - 2][k] - rawData[i + 2][k]) + 42 * (rawData[i - 1][k] -rawData[i + 1][k])); - } - - for (int j = 4; j < H - 4; j++) { - const float avgL = (temp[j - 4][0] + temp[j - 3][0] + temp[j - 2][0] + temp[j - 1][0] + temp[j][0] + temp[j + 1][0] + temp[j + 2][0] + temp[j + 3][0] + temp[j + 4][0]) / 9.f; - avg[j][0] = avgL; - dev[j][0] = std::max(0.001f, SQR(temp[j - 4][0] - avgL) + SQR(temp[j - 3][0] - avgL) + SQR(temp[j - 2][0] - avgL) + SQR(temp[j - 1][0] - avgL) + SQR(temp[j][0] - avgL) + SQR(temp[j + 1][0] - avgL) + SQR(temp[j + 2][0] - avgL) + SQR(temp[j + 3][0] - avgL) + SQR(temp[j + 4][0] - avgL)); - } - - for (int j = 5; j < H - 5; j++) { - const float avgL = avg[j - 1][0]; - const float avgR = avg[j + 1][0]; - const float devL = dev[j - 1][0]; - const float devR = dev[j + 1][0]; - hpmap[j][k] = avgL + (avgR - avgL) * devL / (devL + devR); - } - } - -} - -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -void RawImageSource::hphd_horizontal (float** hpmap, int row_from, int row_to) -{ -// BENCHFUN - float* temp = new float[W]; - float* avg = new float[W]; - float* dev = new float[W]; - - memset(temp, 0, W * sizeof(float)); - memset(avg, 0, W * sizeof(float)); - memset(dev, 0, W * sizeof(float)); - - for (int i = row_from; i < row_to; i++) { - #pragma omp simd - for (int j = 5; j < W - 5; j++) { - temp[j] = std::fabs((rawData[i][j - 5] - rawData[i][j + 5]) - 8 * (rawData[i][j - 4] - rawData[i][j + 4]) + 27 * (rawData[i][j - 3] - rawData[i][j + 3]) - 48 * (rawData[i][j - 2] - rawData[i][j + 2]) + 42 * (rawData[i][j - 1] - rawData[i][j + 1])); - } - - #pragma omp simd - for (int j = 4; j < W - 4; j++) { - const float avgL = ((temp[j - 4] + temp[j - 3]) + (temp[j - 2] + temp[j - 1]) + (temp[j] + temp[j + 1]) + (temp[j + 2] + temp[j + 3]) + temp[j + 4]) / 9.f; - avg[j] = avgL; - dev[j] = std::max(0.001f, SQR(temp[j - 4] - avgL) + SQR(temp[j - 3] - avgL) + SQR(temp[j - 2] - avgL) + SQR(temp[j - 1] - avgL) + SQR(temp[j] - avgL) + SQR(temp[j + 1] - avgL) + SQR(temp[j + 2] - avgL) + SQR(temp[j + 3] - avgL) + SQR(temp[j + 4] - avgL)); - } - - #pragma omp simd - for (int j = 5; j < W - 5; j++) { - const float avgL = avg[j - 1]; - const float avgR = avg[j + 1]; - const float devL = dev[j - 1]; - const float devR = dev[j + 1]; - const float hpv = avgL + (avgR - avgL) * devL / (devL + devR); - - if (hpmap[i][j] < 0.8f * hpv) { - hpmap[i][j] = 2; - } else if (hpv < 0.8f * hpmap[i][j]) { - hpmap[i][j] = 1; - } else { - hpmap[i][j] = 0; - } - } - } - - delete [] temp; - delete [] avg; - delete [] dev; -} - -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -void RawImageSource::hphd_green (float** hpmap) -{ -// BENCHFUN - - constexpr float eps = 0.001f; -#ifdef _OPENMP - #pragma omp parallel for schedule(dynamic, 16) -#endif - - for (int i = 3; i < H - 3; i++) { - for (int j = 3; j < W - 3; j++) { - if (ri->ISGREEN(i, j)) { - green[i][j] = rawData[i][j]; - } else { - if (hpmap[i][j] == 1) { - const float g2 = rawData[i][j + 1] + (rawData[i][j] - rawData[i][j + 2]) * 0.5f; - const float g4 = rawData[i][j - 1] + (rawData[i][j] - rawData[i][j - 2]) * 0.5f; - - const float dx = eps + std::fabs(rawData[i][j + 1] - rawData[i][j - 1]); - float d1 = rawData[i][j + 3] - rawData[i][j + 1]; - float d2 = rawData[i][j + 2] - rawData[i][j]; - float d3 = rawData[i - 1][j + 2] - rawData[i - 1][j]; - float d4 = rawData[i + 1][j + 2] - rawData[i + 1][j]; - - const float e2 = 1.f / (dx + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); - - d1 = rawData[i][j - 3] - rawData[i][j - 1]; - d2 = rawData[i][j - 2] - rawData[i][j]; - d3 = rawData[i - 1][j - 2] - rawData[i - 1][j]; - d4 = rawData[i + 1][j - 2] - rawData[i + 1][j]; - - const float e4 = 1.f / (dx + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); - - green[i][j] = (e2 * g2 + e4 * g4) / (e2 + e4); - } else if (hpmap[i][j] == 2) { - const float g1 = rawData[i - 1][j] + (rawData[i][j] - rawData[i - 2][j]) * 0.5f; - const float g3 = rawData[i + 1][j] + (rawData[i][j] - rawData[i + 2][j]) * 0.5f; - - const float dy = eps + std::fabs(rawData[i + 1][j] - rawData[i - 1][j]); - float d1 = rawData[i - 1][j] - rawData[i - 3][j]; - float d2 = rawData[i][j] - rawData[i - 2][j]; - float d3 = rawData[i][j - 1] - rawData[i - 2][j - 1]; - float d4 = rawData[i][j + 1] - rawData[i - 2][j + 1]; - - const float e1 = 1.f / (dy + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); - - d1 = rawData[i + 1][j] - rawData[i + 3][j]; - d2 = rawData[i][j] - rawData[i + 2][j]; - d3 = rawData[i][j - 1] - rawData[i + 2][j - 1]; - d4 = rawData[i][j + 1] - rawData[i + 2][j + 1]; - - const float e3 = 1.f / (dy + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); - - green[i][j] = (e1 * g1 + e3 * g3) / (e1 + e3); - } else { - const float g1 = rawData[i - 1][j] + (rawData[i][j] - rawData[i - 2][j]) * 0.5f; - const float g2 = rawData[i][j + 1] + (rawData[i][j] - rawData[i][j + 2]) * 0.5f; - const float g3 = rawData[i + 1][j] + (rawData[i][j] - rawData[i + 2][j]) * 0.5f; - const float g4 = rawData[i][j - 1] + (rawData[i][j] - rawData[i][j - 2]) * 0.5f; - - const float dx = eps + std::fabs(rawData[i][j + 1] - rawData[i][j - 1]); - const float dy = eps + std::fabs(rawData[i + 1][j] - rawData[i - 1][j]); - - float d1 = rawData[i - 1][j] - rawData[i - 3][j]; - float d2 = rawData[i][j] - rawData[i - 2][j]; - float d3 = rawData[i][j - 1] - rawData[i - 2][j - 1]; - float d4 = rawData[i][j + 1] - rawData[i - 2][j + 1]; - - const float e1 = 1.f / (dy + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); - - d1 = rawData[i][j + 3] - rawData[i][j + 1]; - d2 = rawData[i][j + 2] - rawData[i][j]; - d3 = rawData[i - 1][j + 2] - rawData[i - 1][j]; - d4 = rawData[i + 1][j + 2] - rawData[i + 1][j]; - - const float e2 = 1.f / (dx + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); - - d1 = rawData[i + 1][j] - rawData[i + 3][j]; - d2 = rawData[i][j] - rawData[i + 2][j]; - d3 = rawData[i][j - 1] - rawData[i + 2][j - 1]; - d4 = rawData[i][j + 1] - rawData[i + 2][j + 1]; - - const float e3 = 1.f / (dy + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); - - d1 = rawData[i][j - 3] - rawData[i][j - 1]; - d2 = rawData[i][j - 2] - rawData[i][j]; - d3 = rawData[i - 1][j - 2] - rawData[i - 1][j]; - d4 = rawData[i + 1][j - 2] - rawData[i + 1][j]; - - const float e4 = 1.f / (dx + std::fabs(d1) + std::fabs(d2) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); - - green[i][j] = (e1 * g1 + e2 * g2 + e3 * g3 + e4 * g4) / (e1 + e2 + e3 + e4); - } - } - } - } -} - -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -void RawImageSource::hphd_demosaic () -{ - BENCHFUN - if (plistener) { - plistener->setProgressStr(Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::HPHD))); - plistener->setProgress(0.0); - } - - JaggedArray hpmap(W, H, true); - -#ifdef _OPENMP - #pragma omp parallel - { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int blk = W / nthreads; - - if (tid < nthreads - 1) { - hphd_vertical(hpmap, tid * blk, (tid + 1)*blk); - } else { - hphd_vertical(hpmap, tid * blk, W); - } - } -#else - hphd_vertical(hpmap, 0, W); -#endif - - if (plistener) { - plistener->setProgress(0.33); - } - -#ifdef _OPENMP - #pragma omp parallel - { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int blk = H / nthreads; - - if (tid < nthreads - 1) { - hphd_horizontal(hpmap, tid * blk, (tid + 1)*blk); - } else { - hphd_horizontal(hpmap, tid * blk, H); - } - } -#else - hphd_horizontal(hpmap, 0, H); -#endif - - hphd_green(hpmap); - - if (plistener) { - plistener->setProgress(0.66); - } - - #pragma omp parallel for - for (int i = 4; i < H - 4; i++) { - interpolate_row_rb_mul_pp(rawData, red[i], blue[i], green[i - 1], green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1); - } - - border_interpolate2(W, H, 4, rawData, red, green, blue); - - if (plistener) { - plistener->setProgress(1.0); - } -} - #undef fc #define fc(row,col) \ (ri->get_filters() >> ((((row) << 1 & 14) + ((col) & 1)) << 1) & 3) diff --git a/rtengine/hphd_demosaic_RT.cc b/rtengine/hphd_demosaic_RT.cc new file mode 100644 index 000000000..d5a21a4d0 --- /dev/null +++ b/rtengine/hphd_demosaic_RT.cc @@ -0,0 +1,352 @@ +/* + * This file is part of RawTherapee. + * + * Copyright (c) 2004-2019 Gabor Horvath + * + * RawTherapee is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * RawTherapee is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with RawTherapee. If not, see . + */ +#include + +#include "rawimagesource.h" +#include "rawimagesource_i.h" +#include "jaggedarray.h" +#include "rawimage.h" +#include "rt_math.h" +#include "../rtgui/multilangmgr.h" +#include "opthelper.h" +#define BENCHMARK +#include "StopWatch.h" +#ifdef _OPENMP +#include +#endif + +using namespace std; +using namespace rtengine; + +namespace { + +void hphd_vertical(const array2D &rawData, float** hpmap, int col_from, int col_to, int H) +{ + + // process 'numCols' columns for better usage of L1 cpu cache (especially faster for large values of H) + constexpr int numCols = 8; + JaggedArray temp(numCols, H, true); + JaggedArray avg(numCols, H, true); + JaggedArray dev(numCols, H, true); + + int k = col_from; +#ifdef __SSE2__ + const vfloat ninev = F2V(9.f); + const vfloat epsv = F2V(0.001f); +#endif + for (; k < col_to - 7; k += numCols) { + for (int i = 5; i < H - 5; i++) { + #pragma omp simd + for(int h = 0; h < numCols; ++h) { + temp[i][h] = std::fabs((rawData[i - 5][k + h] - rawData[i + 5][k + h]) - 8 * (rawData[i - 4][k + h] - rawData[i + 4][k + h]) + 27 * (rawData[i - 3][k + h] - rawData[i + 3][k + h]) - 48 * (rawData[i - 2][k + h] - rawData[i + 2][k + h]) + 42 * (rawData[i - 1][k + h] - rawData[i - 1][k + h])); + } + } + + for (int j = 4; j < H - 4; j++) { +#ifdef __SSE2__ + // faster than #pragma omp simd... + const vfloat avgL1 = ((LVFU(temp[j - 4][0]) + LVFU(temp[j - 3][0])) + (LVFU(temp[j - 2][0]) + LVFU(temp[j - 1][0])) + (LVFU(temp[j][0]) + LVFU(temp[j + 1][0])) + (LVFU(temp[j + 2][0]) + LVFU(temp[j + 3][0])) + LVFU(temp[j + 4][0])) / ninev; + STVFU(avg[j][0], avgL1); + STVFU(dev[j][0], vmaxf(epsv, (SQRV(LVFU(temp[j - 4][0]) - avgL1) + SQRV(LVFU(temp[j - 3][0]) - avgL1)) + (SQRV(LVFU(temp[j - 2][0]) - avgL1) + SQRV(LVFU(temp[j - 1][0]) - avgL1)) + (SQRV(LVFU(temp[j][0]) - avgL1) + SQRV(LVFU(temp[j + 1][0]) - avgL1)) + (SQRV(LVFU(temp[j + 2][0]) - avgL1) + SQRV(LVFU(temp[j + 3][0]) - avgL1)) + SQRV(LVFU(temp[j + 4][0]) - avgL1))); + const vfloat avgL2 = ((LVFU(temp[j - 4][4]) + LVFU(temp[j - 3][4])) + (LVFU(temp[j - 2][4]) + LVFU(temp[j - 1][4])) + (LVFU(temp[j][4]) + LVFU(temp[j + 1][4])) + (LVFU(temp[j + 2][4]) + LVFU(temp[j + 3][4])) + LVFU(temp[j + 4][4])) / ninev; + STVFU(avg[j][4], avgL2); + STVFU(dev[j][4], vmaxf(epsv, (SQRV(LVFU(temp[j - 4][4]) - avgL2) + SQRV(LVFU(temp[j - 3][4]) - avgL2)) + (SQRV(LVFU(temp[j - 2][4]) - avgL2) + SQRV(LVFU(temp[j - 1][4]) - avgL2)) + (SQRV(LVFU(temp[j][4]) - avgL2) + SQRV(LVFU(temp[j + 1][4]) - avgL2)) + (SQRV(LVFU(temp[j + 2][4]) - avgL2) + SQRV(LVFU(temp[j + 3][4]) - avgL2)) + SQRV(LVFU(temp[j + 4][4]) - avgL2))); +#else + #pragma omp simd + for(int h = 0; h < numCols; ++h) { + const float avgL = ((temp[j - 4][h] + temp[j - 3][h]) + (temp[j - 2][h] + temp[j - 1][h]) + (temp[j][h] + temp[j + 1][h]) + (temp[j + 2][h] + temp[j + 3][h]) + temp[j + 4][h]) / 9.f; + avg[j][h] = avgL; + dev[j][h] = std::max(0.001f, (SQR(temp[j - 4][h] - avgL) + SQR(temp[j - 3][h] - avgL)) + (SQR(temp[j - 2][h] - avgL) + SQR(temp[j - 1][h] - avgL)) + (SQR(temp[j][h] - avgL) + SQR(temp[j + 1][h] - avgL)) + (SQR(temp[j + 2][h] - avgL) + SQR(temp[j + 3][h] - avgL)) + SQR(temp[j + 4][h] - avgL)); + } +#endif + } + + for (int j = 5; j < H - 5; j++) { + #pragma omp simd + for(int h = 0; h < numCols; ++h) { + const float avgL = avg[j - 1][h]; + const float avgR = avg[j + 1][h]; + const float devL = dev[j - 1][h]; + const float devR = dev[j + 1][h]; + hpmap[j][k + h] = avgL + (avgR - avgL) * devL / (devL + devR); + } + } + } + for (; k < col_to; k++) { + for (int i = 5; i < H - 5; i++) { + temp[i][0] = std::fabs((rawData[i - 5][k] - rawData[i + 5][k]) - 8 * (rawData[i - 4][k] - rawData[i + 4][k]) + 27 * (rawData[i - 3][k] - rawData[i + 3][k]) - 48 * (rawData[i - 2][k] - rawData[i + 2][k]) + 42 * (rawData[i - 1][k] -rawData[i + 1][k])); + } + + for (int j = 4; j < H - 4; j++) { + const float avgL = (temp[j - 4][0] + temp[j - 3][0] + temp[j - 2][0] + temp[j - 1][0] + temp[j][0] + temp[j + 1][0] + temp[j + 2][0] + temp[j + 3][0] + temp[j + 4][0]) / 9.f; + avg[j][0] = avgL; + dev[j][0] = std::max(0.001f, SQR(temp[j - 4][0] - avgL) + SQR(temp[j - 3][0] - avgL) + SQR(temp[j - 2][0] - avgL) + SQR(temp[j - 1][0] - avgL) + SQR(temp[j][0] - avgL) + SQR(temp[j + 1][0] - avgL) + SQR(temp[j + 2][0] - avgL) + SQR(temp[j + 3][0] - avgL) + SQR(temp[j + 4][0] - avgL)); + } + + for (int j = 5; j < H - 5; j++) { + const float avgL = avg[j - 1][0]; + const float avgR = avg[j + 1][0]; + const float devL = dev[j - 1][0]; + const float devR = dev[j + 1][0]; + hpmap[j][k] = avgL + (avgR - avgL) * devL / (devL + devR); + } + } +} + +void hphd_horizontal(const array2D &rawData, float** hpmap, int row_from, int row_to, int W) +{ + + float* temp = new float[W]; + float* avg = new float[W]; + float* dev = new float[W]; + + memset(temp, 0, W * sizeof(float)); + memset(avg, 0, W * sizeof(float)); + memset(dev, 0, W * sizeof(float)); + +#ifdef __SSE2__ + const vfloat onev = F2V(1.f); + const vfloat twov = F2V(2.f); + const vfloat zd8v = F2V(0.8f); +#endif + for (int i = row_from; i < row_to; i++) { + #pragma omp simd + for (int j = 5; j < W - 5; j++) { + temp[j] = std::fabs((rawData[i][j - 5] - rawData[i][j + 5]) - 8 * (rawData[i][j - 4] - rawData[i][j + 4]) + 27 * (rawData[i][j - 3] - rawData[i][j + 3]) - 48 * (rawData[i][j - 2] - rawData[i][j + 2]) + 42 * (rawData[i][j - 1] - rawData[i][j + 1])); + } + + #pragma omp simd + for (int j = 4; j < W - 4; j++) { + const float avgL = ((temp[j - 4] + temp[j - 3]) + (temp[j - 2] + temp[j - 1]) + (temp[j] + temp[j + 1]) + (temp[j + 2] + temp[j + 3]) + temp[j + 4]) / 9.f; + avg[j] = avgL; + dev[j] = std::max(0.001f, (SQR(temp[j - 4] - avgL) + SQR(temp[j - 3] - avgL)) + (SQR(temp[j - 2] - avgL) + SQR(temp[j - 1] - avgL)) + (SQR(temp[j] - avgL) + SQR(temp[j + 1] - avgL)) + (SQR(temp[j + 2] - avgL) + SQR(temp[j + 3] - avgL)) + SQR(temp[j + 4] - avgL)); + } + + int j = 5; +#ifdef __SSE2__ + // faster than #pragma omp simd + for (; j < W - 8; j+=4) { + const vfloat avgL = LVFU(avg[j - 1]); + const vfloat avgR = LVFU(avg[j + 1]); + const vfloat devL = LVFU(dev[j - 1]); + const vfloat devR = LVFU(dev[j + 1]); + const vfloat hpv = avgL + (avgR - avgL) * devL / (devL + devR); + + const vfloat hpmapoldv = LVFU(hpmap[i][j]); + const vfloat hpmapv = vselfzero(vmaskf_lt(hpmapoldv, zd8v * hpv), twov); + STVFU(hpmap[i][j], vself(vmaskf_lt(hpv, zd8v * hpmapoldv), onev, hpmapv)); + } +#endif + for (; j < W - 5; j++) { + const float avgL = avg[j - 1]; + const float avgR = avg[j + 1]; + const float devL = dev[j - 1]; + const float devR = dev[j + 1]; + const float hpv = avgL + (avgR - avgL) * devL / (devL + devR); + + if (hpmap[i][j] < 0.8f * hpv) { + hpmap[i][j] = 2; + } else if (hpv < 0.8f * hpmap[i][j]) { + hpmap[i][j] = 1; + } else { + hpmap[i][j] = 0; + } + } + } + + delete [] temp; + delete [] avg; + delete [] dev; +} + +void hphd_green(const RawImage *ri, const array2D &rawData, float** hpmap, int W, int H, array2D &green) +{ + + constexpr float eps = 0.001f; +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic, 16) +#endif + + for (int i = 3; i < H - 3; i++) { + for (int j = 3; j < W - 3; j++) { + if (ri->ISGREEN(i, j)) { + green[i][j] = rawData[i][j]; + } else { + if (hpmap[i][j] == 1) { + const float g2 = rawData[i][j + 1] - rawData[i][j + 2] * 0.5f; + const float g4 = rawData[i][j - 1] - rawData[i][j - 2] * 0.5f; + + const float dx = eps + std::fabs(rawData[i][j + 1] - rawData[i][j - 1]); + float d1 = rawData[i][j + 3] - rawData[i][j + 1]; + float d2 = rawData[i][j + 2] - rawData[i][j]; + float d3 = rawData[i - 1][j + 2] - rawData[i - 1][j]; + float d4 = rawData[i + 1][j + 2] - rawData[i + 1][j]; + + const float e2 = 1.f / (dx + (std::fabs(d1) + std::fabs(d2)) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); + + d1 = rawData[i][j - 3] - rawData[i][j - 1]; + d2 = rawData[i][j - 2] - rawData[i][j]; + d3 = rawData[i - 1][j - 2] - rawData[i - 1][j]; + d4 = rawData[i + 1][j - 2] - rawData[i + 1][j]; + + const float e4 = 1.f / (dx + (std::fabs(d1) + std::fabs(d2)) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); + + green[i][j] = rawData[i][j] * 0.5f + (e2 * g2 + e4 * g4) / (e2 + e4); + } else if (hpmap[i][j] == 2) { + const float g1 = rawData[i - 1][j] - rawData[i - 2][j] * 0.5f; + const float g3 = rawData[i + 1][j] - rawData[i + 2][j] * 0.5f; + + const float dy = eps + std::fabs(rawData[i + 1][j] - rawData[i - 1][j]); + float d1 = rawData[i - 1][j] - rawData[i - 3][j]; + float d2 = rawData[i][j] - rawData[i - 2][j]; + float d3 = rawData[i][j - 1] - rawData[i - 2][j - 1]; + float d4 = rawData[i][j + 1] - rawData[i - 2][j + 1]; + + const float e1 = 1.f / (dy + (std::fabs(d1) + std::fabs(d2)) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); + + d1 = rawData[i + 1][j] - rawData[i + 3][j]; + d2 = rawData[i][j] - rawData[i + 2][j]; + d3 = rawData[i][j - 1] - rawData[i + 2][j - 1]; + d4 = rawData[i][j + 1] - rawData[i + 2][j + 1]; + + const float e3 = 1.f / (dy + (std::fabs(d1) + std::fabs(d2)) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); + + green[i][j] = rawData[i][j] * 0.5f + (e1 * g1 + e3 * g3) / (e1 + e3); + } else { + const float g1 = rawData[i - 1][j] - rawData[i - 2][j] * 0.5f; + const float g2 = rawData[i][j + 1] - rawData[i][j + 2] * 0.5f; + const float g3 = rawData[i + 1][j] - rawData[i + 2][j] * 0.5f; + const float g4 = rawData[i][j - 1] - rawData[i][j - 2] * 0.5f; + + const float dx = eps + std::fabs(rawData[i][j + 1] - rawData[i][j - 1]); + const float dy = eps + std::fabs(rawData[i + 1][j] - rawData[i - 1][j]); + + float d1 = rawData[i - 1][j] - rawData[i - 3][j]; + float d2 = rawData[i][j] - rawData[i - 2][j]; + float d3 = rawData[i][j - 1] - rawData[i - 2][j - 1]; + float d4 = rawData[i][j + 1] - rawData[i - 2][j + 1]; + + const float e1 = 1.f / (dy + (std::fabs(d1) + std::fabs(d2)) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); + + d1 = rawData[i][j + 3] - rawData[i][j + 1]; + d2 = rawData[i][j + 2] - rawData[i][j]; + d3 = rawData[i - 1][j + 2] - rawData[i - 1][j]; + d4 = rawData[i + 1][j + 2] - rawData[i + 1][j]; + + const float e2 = 1.f / (dx + (std::fabs(d1) + std::fabs(d2)) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); + + d1 = rawData[i + 1][j] - rawData[i + 3][j]; + d2 = rawData[i][j] - rawData[i + 2][j]; + d3 = rawData[i][j - 1] - rawData[i + 2][j - 1]; + d4 = rawData[i][j + 1] - rawData[i + 2][j + 1]; + + const float e3 = 1.f / (dy + (std::fabs(d1) + std::fabs(d2)) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); + + d1 = rawData[i][j - 3] - rawData[i][j - 1]; + d2 = rawData[i][j - 2] - rawData[i][j]; + d3 = rawData[i - 1][j - 2] - rawData[i - 1][j]; + d4 = rawData[i + 1][j - 2] - rawData[i + 1][j]; + + const float e4 = 1.f / (dx + (std::fabs(d1) + std::fabs(d2)) + (std::fabs(d3) + std::fabs(d4)) * 0.5f); + + green[i][j] = rawData[i][j] * 0.5f + ((e1 * g1 + e2 * g2) + (e3 * g3 + e4 * g4)) / (e1 + e2 + e3 + e4); + } + } + } + } +} + +} + +namespace rtengine +{ + +void RawImageSource::hphd_demosaic () +{ + BENCHFUN + if (plistener) { + plistener->setProgressStr(Glib::ustring::compose(M("TP_RAW_DMETHOD_PROGRESSBAR"), RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::HPHD))); + plistener->setProgress(0.0); + } + + JaggedArray hpmap(W, H, true); + +#ifdef _OPENMP + #pragma omp parallel + { + int tid = omp_get_thread_num(); + int nthreads = omp_get_num_threads(); + int blk = W / nthreads; + + if (tid < nthreads - 1) { + hphd_vertical(rawData, hpmap, tid * blk, (tid + 1)*blk, H); + } else { + hphd_vertical(rawData, hpmap, tid * blk, W, H); + } + } +#else + hphd_vertical(hpmap, 0, W, H); +#endif + + if (plistener) { + plistener->setProgress(0.35); + } + +#ifdef _OPENMP + #pragma omp parallel + { + int tid = omp_get_thread_num(); + int nthreads = omp_get_num_threads(); + int blk = H / nthreads; + + if (tid < nthreads - 1) { + hphd_horizontal(rawData, hpmap, tid * blk, (tid + 1)*blk, W); + } else { + hphd_horizontal(rawData, hpmap, tid * blk, H, W); + } + } +#else + hphd_horizontal(hpmap, 0, H); +#endif + + if (plistener) { + plistener->setProgress(0.43); + } + + hphd_green(ri, rawData, hpmap, W, H, green); + + if (plistener) { + plistener->setProgress(0.65); + } + + #pragma omp parallel for + for (int i = 4; i < H - 4; i++) { + interpolate_row_rb_mul_pp(rawData, red[i], blue[i], green[i - 1], green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1); + } + + border_interpolate2(W, H, 4, rawData, red, green, blue); + + if (plistener) { + plistener->setProgress(1.0); + } +} + + + +} /* namespace */ diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index 470fd138b..a23e9c3cb 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -93,9 +93,6 @@ protected: std::vector histMatchingCache; ColorManagementParams histMatchingParams; - void hphd_vertical (float** hpmap, int col_from, int col_to); - void hphd_horizontal (float** hpmap, int row_from, int row_to); - void hphd_green (float** hpmap); void processFalseColorCorrectionThread (Imagefloat* im, array2D &rbconv_Y, array2D &rbconv_I, array2D &rbconv_Q, array2D &rbout_I, array2D &rbout_Q, const int row_from, const int row_to); void hlRecovery (const std::string &method, float* red, float* green, float* blue, int width, float* hlmax); void transformRect (const PreviewProps &pp, int tran, int &sx1, int &sy1, int &width, int &height, int &fw); From 7c3707a39233cc3162aa198be8e3d756f798282f Mon Sep 17 00:00:00 2001 From: heckflosse Date: Thu, 31 Jan 2019 22:27:34 +0100 Subject: [PATCH 3/5] hphd dempsaic: disable timing code, #5159 --- rtengine/hphd_demosaic_RT.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/hphd_demosaic_RT.cc b/rtengine/hphd_demosaic_RT.cc index d5a21a4d0..964b73e80 100644 --- a/rtengine/hphd_demosaic_RT.cc +++ b/rtengine/hphd_demosaic_RT.cc @@ -25,7 +25,7 @@ #include "rt_math.h" #include "../rtgui/multilangmgr.h" #include "opthelper.h" -#define BENCHMARK +//#define BENCHMARK #include "StopWatch.h" #ifdef _OPENMP #include From a5427f32a23ea9d2daccabfaf906e12a896c8dd6 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Thu, 31 Jan 2019 23:31:06 +0100 Subject: [PATCH 4/5] hphd demosaic: fix wrong index, #5159 --- rtengine/hphd_demosaic_RT.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtengine/hphd_demosaic_RT.cc b/rtengine/hphd_demosaic_RT.cc index 964b73e80..7afb7daa6 100644 --- a/rtengine/hphd_demosaic_RT.cc +++ b/rtengine/hphd_demosaic_RT.cc @@ -25,7 +25,7 @@ #include "rt_math.h" #include "../rtgui/multilangmgr.h" #include "opthelper.h" -//#define BENCHMARK +#define BENCHMARK #include "StopWatch.h" #ifdef _OPENMP #include @@ -54,7 +54,7 @@ void hphd_vertical(const array2D &rawData, float** hpmap, int col_from, i for (int i = 5; i < H - 5; i++) { #pragma omp simd for(int h = 0; h < numCols; ++h) { - temp[i][h] = std::fabs((rawData[i - 5][k + h] - rawData[i + 5][k + h]) - 8 * (rawData[i - 4][k + h] - rawData[i + 4][k + h]) + 27 * (rawData[i - 3][k + h] - rawData[i + 3][k + h]) - 48 * (rawData[i - 2][k + h] - rawData[i + 2][k + h]) + 42 * (rawData[i - 1][k + h] - rawData[i - 1][k + h])); + temp[i][h] = std::fabs((rawData[i - 5][k + h] - rawData[i + 5][k + h]) - 8 * (rawData[i - 4][k + h] - rawData[i + 4][k + h]) + 27 * (rawData[i - 3][k + h] - rawData[i + 3][k + h]) - 48 * (rawData[i - 2][k + h] - rawData[i + 2][k + h]) + 42 * (rawData[i - 1][k + h] - rawData[i + 1][k + h])); } } From 8207d174a0cc6c815b6e23ef29af5f34d6fd9dc0 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Thu, 31 Jan 2019 23:37:33 +0100 Subject: [PATCH 5/5] hphd demosaic: disable timing code, #5159 --- rtengine/hphd_demosaic_RT.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/hphd_demosaic_RT.cc b/rtengine/hphd_demosaic_RT.cc index 7afb7daa6..fd2e5cce1 100644 --- a/rtengine/hphd_demosaic_RT.cc +++ b/rtengine/hphd_demosaic_RT.cc @@ -25,7 +25,7 @@ #include "rt_math.h" #include "../rtgui/multilangmgr.h" #include "opthelper.h" -#define BENCHMARK +//#define BENCHMARK #include "StopWatch.h" #ifdef _OPENMP #include