Some changes as suggested by @Floessie, #5070

This commit is contained in:
heckflosse
2018-12-07 16:22:24 +01:00
parent 72ee991858
commit d9d8005706
2 changed files with 225 additions and 248 deletions

View File

@@ -20,6 +20,7 @@
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <vector>
#ifdef _OPENMP
@@ -31,7 +32,6 @@
#include "rt_algo.h"
#include "rt_math.h"
#include "sleef.c"
#include "jaggedarray.h"
namespace {
float calcBlendFactor(float val, float threshold) {
@@ -54,46 +54,110 @@ vfloat calcBlendFactor(vfloat valv, vfloat thresholdv) {
float tileAverage(float **data, size_t tileY, size_t tileX, size_t tilesize) {
float avg = 0.f;
#ifdef __SSE2__
vfloat avgv = ZEROV;
for (size_t y = tileY; y < tileY + tilesize; ++y) {
for (size_t x = tileX; x < tileX + tilesize; x += 4) {
#endif
for (std::size_t y = tileY; y < tileY + tilesize; ++y) {
std::size_t x = tileX;
#ifdef __SSE2__
for (; x < tileX + tilesize - 3; x += 4) {
avgv += LVFU(data[y][x]);
}
}
const float avg = vhadd(avgv);
#else
float avg = 0.f;
for (size_t y = tileY; y < tileY + tilesize; ++y) {
for (size_t x = tileX; x < tileX + tilesize; ++x) {
#endif
for (; x < tileX + tilesize; ++x) {
avg += data[y][x];
}
}
#ifdef __SSE2__
avg += vhadd(avgv);
#endif
return avg / rtengine::SQR(tilesize);
}
float tileVariance(float **data, size_t tileY, size_t tileX, size_t tilesize, float avg) {
float var = 0.f;
#ifdef __SSE2__
vfloat varv = ZEROV;
const vfloat avgv = F2V(avg);
for (size_t y = tileY; y < tileY + tilesize; ++y) {
for (size_t x = tileX; x < tileX + tilesize; x +=4) {
#endif
for (std::size_t y = tileY; y < tileY + tilesize; ++y) {
std::size_t x = tileX;
#ifdef __SSE2__
for (; x < tileX + tilesize - 3; x += 4) {
varv += SQRV(LVFU(data[y][x]) - avgv);
}
}
const float var = vhadd(varv);
#else
float var = 0.f;
for (size_t y = tileY; y < tileY + tilesize; ++y) {
for (size_t x = tileX;; x < tileX + tilesize; ++x) {
#endif
for (; x < tileX + tilesize; ++x) {
var += rtengine::SQR(data[y][x] - avg);
}
}
#ifdef __SSE2__
var += vhadd(varv);
#endif
return var / (rtengine::SQR(tilesize) * avg);
}
float calcContrastThreshold(float** luminance, int tileY, int tileX, int tilesize) {
constexpr float scale = 0.0625f / 327.68f;
std::vector<std::vector<float>> blend(tilesize - 4, std::vector<float>(tilesize - 4));
#ifdef __SSE2__
const vfloat scalev = F2V(scale);
#endif
for(int j = tileY + 2; j < tileY + tilesize - 2; ++j) {
int i = tileX + 2;
#ifdef __SSE2__
for(; i < tileX + tilesize - 5; i += 4) {
vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) +
SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev;
STVFU(blend[j - tileY - 2][i - tileX - 2], contrastv);
}
#endif
for(; i < tileX + tilesize - 2; ++i) {
float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) +
rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale;
blend[j - tileY - 2][i - tileX - 2] = contrast;
}
}
const float limit = rtengine::SQR(tilesize - 4) / 100.f;
int c;
for (c = 1; c < 100; ++c) {
const float contrastThreshold = c / 100.f;
float sum = 0.f;
#ifdef __SSE2__
const vfloat contrastThresholdv = F2V(contrastThreshold);
vfloat sumv = ZEROV;
#endif
for(int j = 0; j < tilesize - 4; ++j) {
int i = 0;
#ifdef __SSE2__
for(; i < tilesize - 7; i += 4) {
sumv += calcBlendFactor(LVFU(blend[j][i]), contrastThresholdv);
}
#endif
for(; i < tilesize - 4; ++i) {
sum += calcBlendFactor(blend[j][i], contrastThreshold);
}
}
#ifdef __SSE2__
sum += vhadd(sumv);
#endif
if (sum <= limit) {
break;
}
}
return c / 100.f;
}
}
namespace rtengine
@@ -237,13 +301,6 @@ void findMinMaxPercentile(const float* data, size_t size, float minPrct, float&
void buildBlendMask(float** luminance, float **blend, int W, int H, float &contrastThreshold, float amount, bool autoContrast) {
if(contrastThreshold == 0.f && !autoContrast) {
for(int j = 0; j < H; ++j) {
for(int i = 0; i < W; ++i) {
blend[j][i] = amount;
}
}
} else {
if (autoContrast) {
constexpr float minLuminance = 2000.f;
constexpr float maxLuminance = 20000.f;
@@ -290,14 +347,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
if (minvar <= 1.f || pass == 1) {
if (pass == 0) {
// a variance <= 1 means we already found a flat region and can skip second pass
JaggedArray<float> Lum(tilesize, tilesize);
JaggedArray<float> Blend(tilesize, tilesize);
for (int i = 0; i < tilesize; ++i) {
for (int j = 0; j < tilesize; ++j) {
Lum[i][j] = luminance[i + minY][j + minX];
}
}
contrastThreshold = calcContrastThreshold(Lum, Blend, tilesize, tilesize) / 100.f;
contrastThreshold = calcContrastThreshold(luminance, minY, minX, tilesize);
break;
} else {
// in second pass we allow a variance of 4
@@ -339,20 +389,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
}
}
}
if (minvar <= 4.f) {
JaggedArray<float> Lum(tilesize, tilesize);
JaggedArray<float> Blend(tilesize, tilesize);
const int minY = topLeftYStart + minI;
const int minX = topLeftXStart + minJ;
for (int i = 0; i < tilesize; ++i) {
for (int j = 0; j < tilesize; ++j) {
Lum[i][j] = luminance[i + minY][j + minX];
}
}
contrastThreshold = calcContrastThreshold(Lum, Blend, tilesize, tilesize) / 100.f;
} else {
contrastThreshold = 0.f;
}
contrastThreshold = minvar <= 4.f ? calcContrastThreshold(luminance, topLeftYStart + minI, topLeftXStart + minJ, tilesize) : 0.f;
}
}
}
@@ -426,65 +463,6 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
gaussianBlur(blend, blend, W, H, 2.0);
}
}
}
}
int calcContrastThreshold(float** luminance, float **blend, int W, int H) {
constexpr float scale = 0.0625f / 327.68f;
#ifdef __SSE2__
const vfloat scalev = F2V(scale);
#endif
for(int j = 2; j < H - 2; ++j) {
int i = 2;
#ifdef __SSE2__
for(; i < W - 5; i += 4) {
vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) +
SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev;
STVFU(blend[j -2 ][i - 2], contrastv);
}
#endif
for(; i < W - 2; ++i) {
float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) +
rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale;
blend[j -2][i- 2] = contrast;
}
}
const float limit = (W - 4) * (H - 4) / 100.f;
int c;
for (c = 1; c < 100; ++c) {
const float contrastThreshold = c / 100.f;
float sum = 0.f;
#ifdef __SSE2__
const vfloat contrastThresholdv = F2V(contrastThreshold);
vfloat sumv = ZEROV;
#endif
for(int j = 0; j < H - 4; ++j) {
int i = 0;
#ifdef __SSE2__
for(; i < W - 7; i += 4) {
sumv += calcBlendFactor(LVFU(blend[j][i]), contrastThresholdv);
}
#endif
for(; i < W - 4; ++i) {
sum += calcBlendFactor(blend[j][i], contrastThreshold);
}
}
#ifdef __SSE2__
sum += vhadd(sumv);
#endif
if (sum <= limit) {
break;
}
}
return c;
}
}

View File

@@ -25,5 +25,4 @@ namespace rtengine
{
void findMinMaxPercentile(const float* data, size_t size, float minPrct, float& minOut, float maxPrct, float& maxOut, bool multiThread = true);
void buildBlendMask(float** luminance, float **blend, int W, int H, float &contrastThreshold, float amount = 1.f, bool autoContrast = false);
int calcContrastThreshold(float** luminance, float **blend, int W, int H);
}