SSE code for auto calculation of dual demosaic contrast threshold, #4866

This commit is contained in:
heckflosse
2018-10-17 13:36:41 +02:00
parent 5a30c7e147
commit bb0743898b

View File

@@ -194,8 +194,6 @@ void findMinMaxPercentile(const float* data, size_t size, float minPrct, float&
void buildBlendMask(float** luminance, float **blend, int W, int H, float contrastThreshold, float amount, bool autoContrast) { void buildBlendMask(float** luminance, float **blend, int W, int H, float contrastThreshold, float amount, bool autoContrast) {
constexpr float scale = 0.0625f / 327.68f;
if(contrastThreshold == 0.f) { if(contrastThreshold == 0.f) {
for(int j = 0; j < H; ++j) { for(int j = 0; j < H; ++j) {
for(int i = 0; i < W; ++i) { for(int i = 0; i < W; ++i) {
@@ -203,6 +201,7 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float contra
} }
} }
} else { } else {
constexpr float scale = 0.0625f / 327.68f;
if (autoContrast) { if (autoContrast) {
StopWatch StopC("calculate dual demosaic auto contrast threshold"); StopWatch StopC("calculate dual demosaic auto contrast threshold");
constexpr int tilesize = 80; constexpr int tilesize = 80;
@@ -215,25 +214,46 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float contra
int tileY = i * tilesize; int tileY = i * tilesize;
for (int j = 0; j < numTilesW; ++j) { for (int j = 0; j < numTilesW; ++j) {
int tileX = j * tilesize; int tileX = j * tilesize;
double avg = 0.; #ifdef __SSE2__
vfloat avgv = ZEROV;
for (int y = tileY; y < tileY + tilesize; ++y) {
for (int x = tileX; x < tileX + tilesize; x += 4) {
avgv += LVFU(luminance[y][x]);
}
}
float avg = vhadd(avgv);
#else
float avg = 0.;
for (int y = tileY; y < tileY + tilesize; ++y) { for (int y = tileY; y < tileY + tilesize; ++y) {
for (int x = tileX; x < tileX + tilesize; ++x) { for (int x = tileX; x < tileX + tilesize; ++x) {
avg += luminance[y][x]; avg += luminance[y][x];
} }
} }
#endif
avg /= SQR(tilesize); avg /= SQR(tilesize);
double var = 0.0; #ifdef __SSE2__
vfloat varv = ZEROV;
avgv = F2V(avg);
for (int y = tileY; y < tileY + tilesize; ++y) {
for (int x = tileX; x < tileX + tilesize; x +=4) {
varv += SQRV(LVFU(luminance[y][x]) - avgv);
}
}
float var = vhadd(varv);
#else
float var = 0.0;
for (int y = tileY; y < tileY + tilesize; ++y) { for (int y = tileY; y < tileY + tilesize; ++y) {
for (int x = tileX; x < tileX + tilesize; ++x) { for (int x = tileX; x < tileX + tilesize; ++x) {
var += SQR(luminance[y][x] - avg); var += SQR(luminance[y][x] - avg);
} }
} }
#endif
var /= (SQR(tilesize) * avg); var /= (SQR(tilesize) * avg);
variances[i][j].first = var; variances[i][j].first = var;
variances[i][j].second = avg; variances[i][j].second = avg;
// std::cout << "y : " << tileY << " ; x : " << tileX << " ; avg : " << avg << " ; var : " << var << std::endl;
} }
} }
float minvar = RT_INFINITY_F; float minvar = RT_INFINITY_F;
int minY = 0, minX = 0; int minY = 0, minX = 0;
for (int i = 0; i < numTilesH; ++i) { for (int i = 0; i < numTilesH; ++i) {
@@ -256,7 +276,6 @@ void buildBlendMask(float** luminance, float **blend, int W, int H, float contra
Lum[i][j] = luminance[i + minY][j + minX]; Lum[i][j] = luminance[i + minY][j + minX];
} }
} }
// std::cout << "contrastThreshold : " << contrastThreshold << std::endl;
calcContrastThreshold(Lum, Blend, tilesize, tilesize); calcContrastThreshold(Lum, Blend, tilesize, tilesize);
} }