From 72ee9918587bb69efc3de935768d21ab61d5a498 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Fri, 7 Dec 2018 00:39:41 +0100 Subject: [PATCH 1/5] Improve detection of flat regions for calculation of dual demosaic contrast threshold --- rtengine/rt_algo.cc | 159 +++++++++++++++++++++++++++++++------------- 1 file changed, 113 insertions(+), 46 deletions(-) diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index af2083d8e..bd45819d9 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -51,6 +51,49 @@ vfloat calcBlendFactor(vfloat valv, vfloat thresholdv) { return onev / (onev + xexpf(c16v - c16v * valv / thresholdv)); } #endif + +float tileAverage(float **data, size_t tileY, size_t tileX, size_t tilesize) { + +#ifdef __SSE2__ + vfloat avgv = ZEROV; + for (size_t y = tileY; y < tileY + tilesize; ++y) { + for (size_t x = tileX; x < tileX + tilesize; x += 4) { + avgv += LVFU(data[y][x]); + } + } + const float avg = vhadd(avgv); +#else + float avg = 0.f; + for (size_t y = tileY; y < tileY + tilesize; ++y) { + for (size_t x = tileX; x < tileX + tilesize; ++x) { + avg += data[y][x]; + } + } +#endif + return avg / rtengine::SQR(tilesize); +} + +float tileVariance(float **data, size_t tileY, size_t tileX, size_t tilesize, float avg) { + +#ifdef __SSE2__ + vfloat varv = ZEROV; + const vfloat avgv = F2V(avg); + for (size_t y = tileY; y < tileY + tilesize; ++y) { + for (size_t x = tileX; x < tileX + tilesize; x +=4) { + varv += SQRV(LVFU(data[y][x]) - avgv); + } + } + const float var = vhadd(varv); +#else + float var = 0.f; + for (size_t y = tileY; y < tileY + tilesize; ++y) { + for (size_t x = tileX;; x < tileX + tilesize; ++x) { + var += rtengine::SQR(data[y][x] - avg); + } + } +#endif + return var / (rtengine::SQR(tilesize) * avg); +} } namespace rtengine @@ -202,6 +245,8 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr } } else { if (autoContrast) { + constexpr float minLuminance = 2000.f; + constexpr float maxLuminance = 20000.f; for (int pass = 0; pass < 2; ++pass) { const int tilesize = 80 / (pass + 1); const int skip = pass == 0 ? tilesize : tilesize / 4; @@ -216,47 +261,14 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr const int tileY = i * skip; for (int j = 0; j < numTilesW; ++j) { const int tileX = j * skip; -#ifdef __SSE2__ - vfloat avgv = ZEROV; - for (int y = tileY; y < tileY + tilesize; ++y) { - for (int x = tileX; x < tileX + tilesize; x += 4) { - avgv += LVFU(luminance[y][x]); - } - } - float avg = vhadd(avgv); -#else - float avg = 0.f; - for (int y = tileY; y < tileY + tilesize; ++y) { - for (int x = tileX; x < tileX + tilesize; ++x) { - avg += luminance[y][x]; - } - } -#endif - avg /= SQR(tilesize); - if (avg < 2000.f || avg > 20000.f) { + const float avg = tileAverage(luminance, tileY, tileX, tilesize); + if (avg < minLuminance || avg > maxLuminance) { // too dark or too bright => skip the tile variances[i][j] = RT_INFINITY_F; continue; + } else { + variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg); } -#ifdef __SSE2__ - vfloat varv = ZEROV; - avgv = F2V(avg); - for (int y = tileY; y < tileY + tilesize; ++y) { - for (int x = tileX; x < tileX + tilesize; x +=4) { - varv += SQRV(LVFU(luminance[y][x]) - avgv); - } - } - float var = vhadd(varv); -#else - float var = 0.f; - for (int y = tileY; y < tileY + tilesize; ++y) { - for (int x = tileX; x < tileX + tilesize; ++x) { - var += SQR(luminance[y][x] - avg); - } - } -#endif - var /= (SQR(tilesize) * avg); - variances[i][j] = var; } } @@ -276,17 +288,72 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr const int minX = skip * minJ; if (minvar <= 1.f || pass == 1) { - // a variance <= 1 means we already found a flat region and can skip second pass - // in second pass we allow a variance of 2 - JaggedArray Lum(tilesize, tilesize); - JaggedArray Blend(tilesize, tilesize); - for (int i = 0; i < tilesize; ++i) { - for (int j = 0; j < tilesize; ++j) { - Lum[i][j] = luminance[i + minY][j + minX]; + if (pass == 0) { + // a variance <= 1 means we already found a flat region and can skip second pass + JaggedArray Lum(tilesize, tilesize); + JaggedArray Blend(tilesize, tilesize); + for (int i = 0; i < tilesize; ++i) { + for (int j = 0; j < tilesize; ++j) { + Lum[i][j] = luminance[i + minY][j + minX]; + } + } + contrastThreshold = calcContrastThreshold(Lum, Blend, tilesize, tilesize) / 100.f; + break; + } else { + // in second pass we allow a variance of 4 + // we additionally scan the tiles +-skip pixels around the best tile from pass 2 + // Means we scan (2 * skip + 1)^2 tiles in this step to get a better hit rate + // fortunately the scan is quite fast, so we use only one core and don't parallelize + const int topLeftYStart = std::max(minY - skip, 0); + const int topLeftXStart = std::max(minX - skip, 0); + const int topLeftYEnd = std::min(minY + skip, H - tilesize); + const int topLeftXEnd = std::min(minX + skip, W - tilesize); + const int numTilesH = topLeftYEnd - topLeftYStart + 1; + const int numTilesW = topLeftXEnd - topLeftXStart + 1; + + std::vector> variances(numTilesH, std::vector(numTilesW)); + for (int i = 0; i < numTilesH; ++i) { + const int tileY = topLeftYStart + i; + for (int j = 0; j < numTilesW; ++j) { + const int tileX = topLeftXStart + j; + const float avg = tileAverage(luminance, tileY, tileX, tilesize); + + if (avg < minLuminance || avg > maxLuminance) { + // too dark or too bright => skip the tile + variances[i][j] = RT_INFINITY_F; + continue; + } else { + variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg); + } + } + } + + float minvar = RT_INFINITY_F; + int minI = 0, minJ = 0; + for (int i = 0; i < numTilesH; ++i) { + for (int j = 0; j < numTilesW; ++j) { + if (variances[i][j] < minvar) { + minvar = variances[i][j]; + minI = i; + minJ = j; + } + } + } + if (minvar <= 4.f) { + JaggedArray Lum(tilesize, tilesize); + JaggedArray Blend(tilesize, tilesize); + const int minY = topLeftYStart + minI; + const int minX = topLeftXStart + minJ; + for (int i = 0; i < tilesize; ++i) { + for (int j = 0; j < tilesize; ++j) { + Lum[i][j] = luminance[i + minY][j + minX]; + } + } + contrastThreshold = calcContrastThreshold(Lum, Blend, tilesize, tilesize) / 100.f; + } else { + contrastThreshold = 0.f; } } - contrastThreshold = (pass == 0 || minvar <= 4.f) ? calcContrastThreshold(Lum, Blend, tilesize, tilesize) / 100.f : 0.f; - break; } } } From d9d8005706cb400cb7afdde3f89eb291ead130e7 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Fri, 7 Dec 2018 16:22:24 +0100 Subject: [PATCH 2/5] Some changes as suggested by @Floessie, #5070 --- rtengine/rt_algo.cc | 472 +++++++++++++++++++++----------------------- rtengine/rt_algo.h | 1 - 2 files changed, 225 insertions(+), 248 deletions(-) diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index bd45819d9..a2ca3cac4 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #ifdef _OPENMP @@ -31,7 +32,6 @@ #include "rt_algo.h" #include "rt_math.h" #include "sleef.c" -#include "jaggedarray.h" namespace { float calcBlendFactor(float val, float threshold) { @@ -54,46 +54,110 @@ vfloat calcBlendFactor(vfloat valv, vfloat thresholdv) { float tileAverage(float **data, size_t tileY, size_t tileX, size_t tilesize) { + float avg = 0.f; #ifdef __SSE2__ vfloat avgv = ZEROV; - for (size_t y = tileY; y < tileY + tilesize; ++y) { - for (size_t x = tileX; x < tileX + tilesize; x += 4) { +#endif + for (std::size_t y = tileY; y < tileY + tilesize; ++y) { + std::size_t x = tileX; +#ifdef __SSE2__ + for (; x < tileX + tilesize - 3; x += 4) { avgv += LVFU(data[y][x]); } - } - const float avg = vhadd(avgv); -#else - float avg = 0.f; - for (size_t y = tileY; y < tileY + tilesize; ++y) { - for (size_t x = tileX; x < tileX + tilesize; ++x) { +#endif + for (; x < tileX + tilesize; ++x) { avg += data[y][x]; } } +#ifdef __SSE2__ + avg += vhadd(avgv); #endif return avg / rtengine::SQR(tilesize); } float tileVariance(float **data, size_t tileY, size_t tileX, size_t tilesize, float avg) { + float var = 0.f; #ifdef __SSE2__ vfloat varv = ZEROV; const vfloat avgv = F2V(avg); - for (size_t y = tileY; y < tileY + tilesize; ++y) { - for (size_t x = tileX; x < tileX + tilesize; x +=4) { +#endif + for (std::size_t y = tileY; y < tileY + tilesize; ++y) { + std::size_t x = tileX; +#ifdef __SSE2__ + for (; x < tileX + tilesize - 3; x += 4) { varv += SQRV(LVFU(data[y][x]) - avgv); } - } - const float var = vhadd(varv); -#else - float var = 0.f; - for (size_t y = tileY; y < tileY + tilesize; ++y) { - for (size_t x = tileX;; x < tileX + tilesize; ++x) { +#endif + for (; x < tileX + tilesize; ++x) { var += rtengine::SQR(data[y][x] - avg); } } +#ifdef __SSE2__ + var += vhadd(varv); #endif return var / (rtengine::SQR(tilesize) * avg); } + +float calcContrastThreshold(float** luminance, int tileY, int tileX, int tilesize) { + + constexpr float scale = 0.0625f / 327.68f; + std::vector> blend(tilesize - 4, std::vector(tilesize - 4)); + +#ifdef __SSE2__ + const vfloat scalev = F2V(scale); +#endif + + for(int j = tileY + 2; j < tileY + tilesize - 2; ++j) { + int i = tileX + 2; +#ifdef __SSE2__ + for(; i < tileX + tilesize - 5; i += 4) { + vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + + SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; + STVFU(blend[j - tileY - 2][i - tileX - 2], contrastv); + } +#endif + for(; i < tileX + tilesize - 2; ++i) { + + float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) + + rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale; + + blend[j - tileY - 2][i - tileX - 2] = contrast; + } + } + + const float limit = rtengine::SQR(tilesize - 4) / 100.f; + + int c; + for (c = 1; c < 100; ++c) { + const float contrastThreshold = c / 100.f; + float sum = 0.f; +#ifdef __SSE2__ + const vfloat contrastThresholdv = F2V(contrastThreshold); + vfloat sumv = ZEROV; +#endif + + for(int j = 0; j < tilesize - 4; ++j) { + int i = 0; +#ifdef __SSE2__ + for(; i < tilesize - 7; i += 4) { + sumv += calcBlendFactor(LVFU(blend[j][i]), contrastThresholdv); + } +#endif + for(; i < tilesize - 4; ++i) { + sum += calcBlendFactor(blend[j][i], contrastThreshold); + } + } +#ifdef __SSE2__ + sum += vhadd(sumv); +#endif + if (sum <= limit) { + break; + } + } + + return c / 100.f; +} } namespace rtengine @@ -237,254 +301,168 @@ void findMinMaxPercentile(const float* data, size_t size, float minPrct, float& void buildBlendMask(float** luminance, float **blend, int W, int H, float &contrastThreshold, float amount, bool autoContrast) { - if(contrastThreshold == 0.f && !autoContrast) { + if (autoContrast) { + constexpr float minLuminance = 2000.f; + constexpr float maxLuminance = 20000.f; + for (int pass = 0; pass < 2; ++pass) { + const int tilesize = 80 / (pass + 1); + const int skip = pass == 0 ? tilesize : tilesize / 4; + const int numTilesW = W / skip - 3 * pass; + const int numTilesH = H / skip - 3 * pass; + std::vector> variances(numTilesH, std::vector(numTilesW)); + +#ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) +#endif + for (int i = 0; i < numTilesH; ++i) { + const int tileY = i * skip; + for (int j = 0; j < numTilesW; ++j) { + const int tileX = j * skip; + const float avg = tileAverage(luminance, tileY, tileX, tilesize); + if (avg < minLuminance || avg > maxLuminance) { + // too dark or too bright => skip the tile + variances[i][j] = RT_INFINITY_F; + continue; + } else { + variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg); + } + } + } + + float minvar = RT_INFINITY_F; + int minI = 0, minJ = 0; + for (int i = 0; i < numTilesH; ++i) { + for (int j = 0; j < numTilesW; ++j) { + if (variances[i][j] < minvar) { + minvar = variances[i][j]; + minI = i; + minJ = j; + } + } + } + + const int minY = skip * minI; + const int minX = skip * minJ; + + if (minvar <= 1.f || pass == 1) { + if (pass == 0) { + // a variance <= 1 means we already found a flat region and can skip second pass + contrastThreshold = calcContrastThreshold(luminance, minY, minX, tilesize); + break; + } else { + // in second pass we allow a variance of 4 + // we additionally scan the tiles +-skip pixels around the best tile from pass 2 + // Means we scan (2 * skip + 1)^2 tiles in this step to get a better hit rate + // fortunately the scan is quite fast, so we use only one core and don't parallelize + const int topLeftYStart = std::max(minY - skip, 0); + const int topLeftXStart = std::max(minX - skip, 0); + const int topLeftYEnd = std::min(minY + skip, H - tilesize); + const int topLeftXEnd = std::min(minX + skip, W - tilesize); + const int numTilesH = topLeftYEnd - topLeftYStart + 1; + const int numTilesW = topLeftXEnd - topLeftXStart + 1; + + std::vector> variances(numTilesH, std::vector(numTilesW)); + for (int i = 0; i < numTilesH; ++i) { + const int tileY = topLeftYStart + i; + for (int j = 0; j < numTilesW; ++j) { + const int tileX = topLeftXStart + j; + const float avg = tileAverage(luminance, tileY, tileX, tilesize); + + if (avg < minLuminance || avg > maxLuminance) { + // too dark or too bright => skip the tile + variances[i][j] = RT_INFINITY_F; + continue; + } else { + variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg); + } + } + } + + float minvar = RT_INFINITY_F; + int minI = 0, minJ = 0; + for (int i = 0; i < numTilesH; ++i) { + for (int j = 0; j < numTilesW; ++j) { + if (variances[i][j] < minvar) { + minvar = variances[i][j]; + minI = i; + minJ = j; + } + } + } + contrastThreshold = minvar <= 4.f ? calcContrastThreshold(luminance, topLeftYStart + minI, topLeftXStart + minJ, tilesize) : 0.f; + } + } + } + } + + if(contrastThreshold == 0.f) { for(int j = 0; j < H; ++j) { for(int i = 0; i < W; ++i) { blend[j][i] = amount; } } } else { - if (autoContrast) { - constexpr float minLuminance = 2000.f; - constexpr float maxLuminance = 20000.f; - for (int pass = 0; pass < 2; ++pass) { - const int tilesize = 80 / (pass + 1); - const int skip = pass == 0 ? tilesize : tilesize / 4; - const int numTilesW = W / skip - 3 * pass; - const int numTilesH = H / skip - 3 * pass; - std::vector> variances(numTilesH, std::vector(numTilesW)); - + constexpr float scale = 0.0625f / 327.68f; #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic) + #pragma omp parallel +#endif + { +#ifdef __SSE2__ + const vfloat contrastThresholdv = F2V(contrastThreshold); + const vfloat scalev = F2V(scale); + const vfloat amountv = F2V(amount); #endif - for (int i = 0; i < numTilesH; ++i) { - const int tileY = i * skip; - for (int j = 0; j < numTilesW; ++j) { - const int tileX = j * skip; - const float avg = tileAverage(luminance, tileY, tileX, tilesize); - if (avg < minLuminance || avg > maxLuminance) { - // too dark or too bright => skip the tile - variances[i][j] = RT_INFINITY_F; - continue; - } else { - variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg); - } - } - } - - float minvar = RT_INFINITY_F; - int minI = 0, minJ = 0; - for (int i = 0; i < numTilesH; ++i) { - for (int j = 0; j < numTilesW; ++j) { - if (variances[i][j] < minvar) { - minvar = variances[i][j]; - minI = i; - minJ = j; - } - } - } - - const int minY = skip * minI; - const int minX = skip * minJ; - - if (minvar <= 1.f || pass == 1) { - if (pass == 0) { - // a variance <= 1 means we already found a flat region and can skip second pass - JaggedArray Lum(tilesize, tilesize); - JaggedArray Blend(tilesize, tilesize); - for (int i = 0; i < tilesize; ++i) { - for (int j = 0; j < tilesize; ++j) { - Lum[i][j] = luminance[i + minY][j + minX]; - } - } - contrastThreshold = calcContrastThreshold(Lum, Blend, tilesize, tilesize) / 100.f; - break; - } else { - // in second pass we allow a variance of 4 - // we additionally scan the tiles +-skip pixels around the best tile from pass 2 - // Means we scan (2 * skip + 1)^2 tiles in this step to get a better hit rate - // fortunately the scan is quite fast, so we use only one core and don't parallelize - const int topLeftYStart = std::max(minY - skip, 0); - const int topLeftXStart = std::max(minX - skip, 0); - const int topLeftYEnd = std::min(minY + skip, H - tilesize); - const int topLeftXEnd = std::min(minX + skip, W - tilesize); - const int numTilesH = topLeftYEnd - topLeftYStart + 1; - const int numTilesW = topLeftXEnd - topLeftXStart + 1; - - std::vector> variances(numTilesH, std::vector(numTilesW)); - for (int i = 0; i < numTilesH; ++i) { - const int tileY = topLeftYStart + i; - for (int j = 0; j < numTilesW; ++j) { - const int tileX = topLeftXStart + j; - const float avg = tileAverage(luminance, tileY, tileX, tilesize); - - if (avg < minLuminance || avg > maxLuminance) { - // too dark or too bright => skip the tile - variances[i][j] = RT_INFINITY_F; - continue; - } else { - variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg); - } - } - } - - float minvar = RT_INFINITY_F; - int minI = 0, minJ = 0; - for (int i = 0; i < numTilesH; ++i) { - for (int j = 0; j < numTilesW; ++j) { - if (variances[i][j] < minvar) { - minvar = variances[i][j]; - minI = i; - minJ = j; - } - } - } - if (minvar <= 4.f) { - JaggedArray Lum(tilesize, tilesize); - JaggedArray Blend(tilesize, tilesize); - const int minY = topLeftYStart + minI; - const int minX = topLeftXStart + minJ; - for (int i = 0; i < tilesize; ++i) { - for (int j = 0; j < tilesize; ++j) { - Lum[i][j] = luminance[i + minY][j + minX]; - } - } - contrastThreshold = calcContrastThreshold(Lum, Blend, tilesize, tilesize) / 100.f; - } else { - contrastThreshold = 0.f; - } - } - } - } - } - - if(contrastThreshold == 0.f) { - for(int j = 0; j < H; ++j) { - for(int i = 0; i < W; ++i) { - blend[j][i] = amount; - } - } - } else { - constexpr float scale = 0.0625f / 327.68f; #ifdef _OPENMP - #pragma omp parallel + #pragma omp for schedule(dynamic,16) +#endif + + for(int j = 2; j < H - 2; ++j) { + int i = 2; +#ifdef __SSE2__ + for(; i < W - 5; i += 4) { + vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + + SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; + + STVFU(blend[j][i], amountv * calcBlendFactor(contrastv, contrastThresholdv)); + } +#endif + for(; i < W - 2; ++i) { + + float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) + + rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale; + + blend[j][i] = amount * calcBlendFactor(contrast, contrastThreshold); + } + } + +#ifdef _OPENMP + #pragma omp single #endif { -#ifdef __SSE2__ - const vfloat contrastThresholdv = F2V(contrastThreshold); - const vfloat scalev = F2V(scale); - const vfloat amountv = F2V(amount); -#endif -#ifdef _OPENMP - #pragma omp for schedule(dynamic,16) -#endif - - for(int j = 2; j < H - 2; ++j) { - int i = 2; -#ifdef __SSE2__ - for(; i < W - 5; i += 4) { - vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + - SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; - - STVFU(blend[j][i], amountv * calcBlendFactor(contrastv, contrastThresholdv)); - } -#endif - for(; i < W - 2; ++i) { - - float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) + - rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale; - - blend[j][i] = amount * calcBlendFactor(contrast, contrastThreshold); + // upper border + for(int j = 0; j < 2; ++j) { + for(int i = 2; i < W - 2; ++i) { + blend[j][i] = blend[2][i]; } } - -#ifdef _OPENMP - #pragma omp single -#endif - { - // upper border - for(int j = 0; j < 2; ++j) { - for(int i = 2; i < W - 2; ++i) { - blend[j][i] = blend[2][i]; - } - } - // lower border - for(int j = H - 2; j < H; ++j) { - for(int i = 2; i < W - 2; ++i) { - blend[j][i] = blend[H-3][i]; - } - } - for(int j = 0; j < H; ++j) { - // left border - blend[j][0] = blend[j][1] = blend[j][2]; - // right border - blend[j][W - 2] = blend[j][W - 1] = blend[j][W - 3]; + // lower border + for(int j = H - 2; j < H; ++j) { + for(int i = 2; i < W - 2; ++i) { + blend[j][i] = blend[H-3][i]; } } - - // blur blend mask to smooth transitions - gaussianBlur(blend, blend, W, H, 2.0); + for(int j = 0; j < H; ++j) { + // left border + blend[j][0] = blend[j][1] = blend[j][2]; + // right border + blend[j][W - 2] = blend[j][W - 1] = blend[j][W - 3]; + } } + + // blur blend mask to smooth transitions + gaussianBlur(blend, blend, W, H, 2.0); } } } -int calcContrastThreshold(float** luminance, float **blend, int W, int H) { - - constexpr float scale = 0.0625f / 327.68f; - -#ifdef __SSE2__ - const vfloat scalev = F2V(scale); -#endif - - for(int j = 2; j < H - 2; ++j) { - int i = 2; -#ifdef __SSE2__ - for(; i < W - 5; i += 4) { - vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + - SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; - STVFU(blend[j -2 ][i - 2], contrastv); - } -#endif - for(; i < W - 2; ++i) { - - float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) + - rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale; - - blend[j -2][i- 2] = contrast; - } - } - - const float limit = (W - 4) * (H - 4) / 100.f; - - int c; - for (c = 1; c < 100; ++c) { - const float contrastThreshold = c / 100.f; - float sum = 0.f; -#ifdef __SSE2__ - const vfloat contrastThresholdv = F2V(contrastThreshold); - vfloat sumv = ZEROV; -#endif - - for(int j = 0; j < H - 4; ++j) { - int i = 0; -#ifdef __SSE2__ - for(; i < W - 7; i += 4) { - sumv += calcBlendFactor(LVFU(blend[j][i]), contrastThresholdv); - } -#endif - for(; i < W - 4; ++i) { - sum += calcBlendFactor(blend[j][i], contrastThreshold); - } - } -#ifdef __SSE2__ - sum += vhadd(sumv); -#endif - if (sum <= limit) { - break; - } - } - - return c; -} } diff --git a/rtengine/rt_algo.h b/rtengine/rt_algo.h index 0207e6f57..a8e2e3e23 100644 --- a/rtengine/rt_algo.h +++ b/rtengine/rt_algo.h @@ -25,5 +25,4 @@ namespace rtengine { void findMinMaxPercentile(const float* data, size_t size, float minPrct, float& minOut, float maxPrct, float& maxOut, bool multiThread = true); void buildBlendMask(float** luminance, float **blend, int W, int H, float &contrastThreshold, float amount = 1.f, bool autoContrast = false); -int calcContrastThreshold(float** luminance, float **blend, int W, int H); } From 3f606c776a06a42f6851bec4142d4a7da6e9d09d Mon Sep 17 00:00:00 2001 From: heckflosse Date: Fri, 7 Dec 2018 22:07:56 +0100 Subject: [PATCH 3/5] exclude tiles with too low variance from calculation of dual demosaic contrast threshold --- rtengine/rt_algo.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index a2ca3cac4..a304b654f 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -304,6 +304,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr if (autoContrast) { constexpr float minLuminance = 2000.f; constexpr float maxLuminance = 20000.f; + constexpr float minTileVariance = 0.5f; for (int pass = 0; pass < 2; ++pass) { const int tilesize = 80 / (pass + 1); const int skip = pass == 0 ? tilesize : tilesize / 4; @@ -325,6 +326,8 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr continue; } else { variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg); + // exclude tiles with a variance less than minTileVariance + variances[i][j] = variances[i][j] < minTileVariance ? RT_INFINITY_F : variances[i][j]; } } } @@ -341,10 +344,9 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr } } - const int minY = skip * minI; - const int minX = skip * minJ; - if (minvar <= 1.f || pass == 1) { + const int minY = skip * minI; + const int minX = skip * minJ; if (pass == 0) { // a variance <= 1 means we already found a flat region and can skip second pass contrastThreshold = calcContrastThreshold(luminance, minY, minX, tilesize); @@ -374,6 +376,8 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr continue; } else { variances[i][j] = tileVariance(luminance, tileY, tileX, tilesize, avg); + // exclude tiles with a variance less than minTileVariance + variances[i][j] = variances[i][j] < minTileVariance ? RT_INFINITY_F : variances[i][j]; } } } From 1d010029538833bd21af3b3d9ae1d19211ec9474 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sat, 8 Dec 2018 17:42:31 +0100 Subject: [PATCH 4/5] Added console output, #5070 --- rtengine/rt_algo.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index a304b654f..60042cba4 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -32,7 +32,7 @@ #include "rt_algo.h" #include "rt_math.h" #include "sleef.c" - +#include namespace { float calcBlendFactor(float val, float threshold) { // sigmoid function @@ -343,7 +343,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr } } } - + std::cout << pass + 1 << " : " << minvar << std::endl; if (minvar <= 1.f || pass == 1) { const int minY = skip * minI; const int minX = skip * minJ; @@ -393,6 +393,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr } } } + std::cout << 3 << " : " << minvar << std::endl; contrastThreshold = minvar <= 4.f ? calcContrastThreshold(luminance, topLeftYStart + minI, topLeftXStart + minJ, tilesize) : 0.f; } } From 872279ae97a34a9290d42f080007e7c3c34f0cff Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 9 Dec 2018 22:39:00 +0100 Subject: [PATCH 5/5] Remove some console output before merge --- rtengine/rt_algo.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index 60042cba4..1011ae7b7 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -32,7 +32,7 @@ #include "rt_algo.h" #include "rt_math.h" #include "sleef.c" -#include + namespace { float calcBlendFactor(float val, float threshold) { // sigmoid function @@ -343,7 +343,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr } } } - std::cout << pass + 1 << " : " << minvar << std::endl; + if (minvar <= 1.f || pass == 1) { const int minY = skip * minI; const int minX = skip * minJ; @@ -393,7 +393,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr } } } - std::cout << 3 << " : " << minvar << std::endl; + contrastThreshold = minvar <= 4.f ? calcContrastThreshold(luminance, topLeftYStart + minI, topLeftXStart + minJ, tilesize) : 0.f; } }