Small speedup and code cleanup for autocontrast calculation

This commit is contained in:
heckflosse
2018-12-04 23:13:20 +01:00
parent 75a34ef87e
commit 09c55ca6ec

View File

@@ -203,20 +203,19 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
} }
} }
} else { } else {
constexpr float scale = 0.0625f / 327.68f;
if (autoContrast) { if (autoContrast) {
for (int pass = 0; pass < 2; ++pass) { for (int pass = 0; pass < 2; ++pass) {
const int tilesize = 80 / (pass + 1); const int tilesize = 80 / (pass + 1);
const int skip = pass < 1 ? tilesize : tilesize / 4; const int skip = pass < 1 ? tilesize : tilesize / 4;
const int numTilesW = W / skip - 3 * pass; const int numTilesW = W / skip - 3 * pass;
const int numTilesH = H / skip - 3 * pass; const int numTilesH = H / skip - 3 * pass;
std::vector<std::vector<std::pair<float, float>>> variances(numTilesH, std::vector<std::pair<float, float>>(numTilesW)); std::vector<std::vector<float>> variances(numTilesH, std::vector<float>(numTilesW));
#pragma omp parallel for #pragma omp parallel for schedule(dynamic)
for (int i = 0; i < numTilesH; ++i) { for (int i = 0; i < numTilesH; ++i) {
int tileY = i * skip; const int tileY = i * skip;
for (int j = 0; j < numTilesW; ++j) { for (int j = 0; j < numTilesW; ++j) {
int tileX = j * skip; const int tileX = j * skip;
#ifdef __SSE2__ #ifdef __SSE2__
vfloat avgv = ZEROV; vfloat avgv = ZEROV;
for (int y = tileY; y < tileY + tilesize; ++y) { for (int y = tileY; y < tileY + tilesize; ++y) {
@@ -226,7 +225,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
} }
float avg = vhadd(avgv); float avg = vhadd(avgv);
#else #else
float avg = 0.; float avg = 0.f;
for (int y = tileY; y < tileY + tilesize; ++y) { for (int y = tileY; y < tileY + tilesize; ++y) {
for (int x = tileX; x < tileX + tilesize; ++x) { for (int x = tileX; x < tileX + tilesize; ++x) {
avg += luminance[y][x]; avg += luminance[y][x];
@@ -234,6 +233,11 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
} }
#endif #endif
avg /= SQR(tilesize); avg /= SQR(tilesize);
if (avg < 2000.f || avg > 20000.f) {
// too dark or too bright => skip the tile
variances[i][j] = RT_INFINITY_F;
continue;
}
#ifdef __SSE2__ #ifdef __SSE2__
vfloat varv = ZEROV; vfloat varv = ZEROV;
avgv = F2V(avg); avgv = F2V(avg);
@@ -244,16 +248,15 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
} }
float var = vhadd(varv); float var = vhadd(varv);
#else #else
float var = 0.0; float var = 0.f;
for (int y = tileY; y < tileY + tilesize; ++y) { for (int y = tileY; y < tileY + tilesize; ++y) {
for (int x = tileX; x < tileX + tilesize; ++x) { for (int x = tileX; x < tileX + tilesize; ++x) {
var += SQR(luminance[y][x] - avg); var += SQR(luminance[y][x] - avg);
} }
} }
#endif #endif
var /= (SQR(tilesize) * avg); var /= (SQR(tilesize) * avg);
variances[i][j].first = var; variances[i][j] = var;
variances[i][j].second = avg;
} }
} }
@@ -261,8 +264,8 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
int minI = 0, minJ = 0; int minI = 0, minJ = 0;
for (int i = 0; i < numTilesH; ++i) { for (int i = 0; i < numTilesH; ++i) {
for (int j = 0; j < numTilesW; ++j) { for (int j = 0; j < numTilesW; ++j) {
if (variances[i][j].first < minvar && variances[i][j].second > 2000.f && variances[i][j].second < 20000.f) { if (variances[i][j] < minvar) {
minvar = variances[i][j].first; minvar = variances[i][j];
minI = i; minI = i;
minJ = j; minJ = j;
} }
@@ -295,6 +298,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float &contr
} }
} }
} else { } else {
constexpr float scale = 0.0625f / 327.68f;
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel #pragma omp parallel
#endif #endif