Merge pull request #5531 from Beep6581/hotdeadpixel_speedup
findHotDeadPixels: speedup and reduced memory usage
This commit is contained in:
@@ -22,12 +22,39 @@
|
||||
#include "pixelsmap.h"
|
||||
#include "rawimage.h"
|
||||
#include "rawimagesource.h"
|
||||
//#define BENCHMARK
|
||||
#include "StopWatch.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
unsigned fc(const unsigned int cfa[2][2], int r, int c) {
|
||||
return cfa[r & 1][c & 1];
|
||||
}
|
||||
|
||||
inline void sum5x5(const array2D<float>& in, int col, float &sum) {
|
||||
#ifdef __SSE2__
|
||||
// sum up 5*4 = 20 values using SSE
|
||||
// 10 fabs function calls and 10 float additions with SSE
|
||||
const vfloat sumv = (vabsf(LVFU(in[0][col])) + vabsf(LVFU(in[1][col]))) +
|
||||
(vabsf(LVFU(in[2][col])) + vabsf(LVFU(in[3][col]))) +
|
||||
vabsf(LVFU(in[4][col]));
|
||||
// horizontally add the values and add the result to hfnbrave
|
||||
sum += vhadd(sumv);
|
||||
|
||||
// add remaining 5 values of last column
|
||||
sum += (fabsf(in[0][col + 4]) + fabsf(in[1][col + 4])) +
|
||||
(fabsf(in[2][col + 4]) + fabsf(in[3][col + 4])) +
|
||||
fabsf(in[4][col + 4]);
|
||||
#else
|
||||
// 25 fabs function calls and 25 float additions without SSE
|
||||
for (int nn = col; nn < col + 5; ++nn) {
|
||||
sum += (fabsf(in[0][nn]) + fabsf(in[1][nn])) +
|
||||
(fabsf(in[2][nn]) + fabsf(in[3][nn])) +
|
||||
fabsf(in[4][nn]);
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
namespace rtengine
|
||||
@@ -445,126 +472,124 @@ int RawImageSource::interpolateBadPixelsXtrans(const PixelsMap &bitmapBads)
|
||||
/* Search for hot or dead pixels in the image and update the map
|
||||
* For each pixel compare its value to the average of similar color surrounding
|
||||
* (Taken from Emil Martinec idea)
|
||||
* (Optimized by Ingo Weyrich 2013 and 2015)
|
||||
*/
|
||||
* (Optimized by Ingo Weyrich 2013, 2015, and 2019)
|
||||
*/
|
||||
int RawImageSource::findHotDeadPixels(PixelsMap &bpMap, const float thresh, const bool findHotPixels, const bool findDeadPixels) const
|
||||
{
|
||||
BENCHFUN
|
||||
const float varthresh = (20.0 * (thresh / 100.0) + 1.0) / 24.f;
|
||||
|
||||
// allocate temporary buffer
|
||||
float* cfablur = new float[H * W];
|
||||
|
||||
// counter for dead or hot pixels
|
||||
int counter = 0;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#pragma omp parallel reduction(+:counter)
|
||||
#endif
|
||||
{
|
||||
array2D<float> cfablur(W, 5, ARRAY2D_CLEAR_DATA);
|
||||
int firstRow = -1;
|
||||
int lastRow = -1;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for schedule(dynamic,16) nowait
|
||||
// note, static scheduling is important in this implementation
|
||||
#pragma omp for schedule(static) nowait
|
||||
#endif
|
||||
|
||||
for (int i = 2; i < H - 2; i++) {
|
||||
for (int j = 2; j < W - 2; j++) {
|
||||
for (int i = 2; i < H - 2; ++i) {
|
||||
if (firstRow == -1) {
|
||||
firstRow = i;
|
||||
if (firstRow > 2) {
|
||||
for (int row = firstRow - 2; row < firstRow; ++row) {
|
||||
const int destRow = row % 5;
|
||||
for (int j = 2; j < W - 2; ++j) {
|
||||
const float temp = median(rawData[row - 2][j - 2], rawData[row - 2][j], rawData[row - 2][j + 2],
|
||||
rawData[row][j - 2], rawData[row][j], rawData[row][j + 2],
|
||||
rawData[row + 2][j - 2], rawData[row + 2][j], rawData[row + 2][j + 2]);
|
||||
cfablur[destRow][j] = rawData[row][j] - temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
lastRow = i;
|
||||
const int destRow = i % 5;
|
||||
for (int j = 2; j < W - 2; ++j) {
|
||||
const float temp = median(rawData[i - 2][j - 2], rawData[i - 2][j], rawData[i - 2][j + 2],
|
||||
rawData[i][j - 2], rawData[i][j], rawData[i][j + 2],
|
||||
rawData[i + 2][j - 2], rawData[i + 2][j], rawData[i + 2][j + 2]);
|
||||
cfablur[i * W + j] = rawData[i][j] - temp;
|
||||
cfablur[destRow][j] = rawData[i][j] - temp;
|
||||
}
|
||||
|
||||
if (i - 1 > firstRow) {
|
||||
const int rr = i - 2;
|
||||
const int rr0 = rr % 5;
|
||||
for (int cc = 2; cc < W - 2; ++cc) {
|
||||
//evaluate pixel for heat/death
|
||||
float pixdev = cfablur[rr0][cc];
|
||||
|
||||
if (!findDeadPixels && pixdev <= 0.f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!findHotPixels && pixdev >= 0.f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
pixdev = fabsf(pixdev);
|
||||
float hfnbrave = -pixdev;
|
||||
sum5x5(cfablur, cc - 2, hfnbrave);
|
||||
if (pixdev > varthresh * hfnbrave) {
|
||||
// mark the pixel as "bad"
|
||||
bpMap.set(cc, rr);
|
||||
++counter;
|
||||
}
|
||||
} //end of pixel evaluation
|
||||
}
|
||||
}
|
||||
|
||||
// process borders. Former version calculated the median using mirrored border which does not make sense because the original pixel loses weight
|
||||
// Setting the difference between pixel and median for border pixels to zero should do the job not worse then former version
|
||||
#ifdef _OPENMP
|
||||
#pragma omp single
|
||||
#endif
|
||||
{
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
for (int j = 0; j < W; ++j) {
|
||||
cfablur[i * W + j] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 2; i < H - 2; ++i) {
|
||||
for (int j = 0; j < 2; ++j) {
|
||||
cfablur[i * W + j] = 0.f;
|
||||
}
|
||||
|
||||
for (int j = W - 2; j < W; ++j) {
|
||||
cfablur[i * W + j] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = H - 2; i < H; ++i) {
|
||||
for (int j = 0; j < W; ++j) {
|
||||
cfablur[i * W + j] = 0.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp barrier // barrier because of nowait clause above
|
||||
|
||||
#pragma omp for reduction(+:counter) schedule(dynamic,16)
|
||||
#endif
|
||||
|
||||
//cfa pixel heat/death evaluation
|
||||
for (int rr = 2; rr < H - 2; ++rr) {
|
||||
for (int cc = 2, rrmWpcc = rr * W + 2; cc < W - 2; ++cc, ++rrmWpcc) {
|
||||
//evaluate pixel for heat/death
|
||||
float pixdev = cfablur[rrmWpcc];
|
||||
|
||||
if (pixdev == 0.f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((!findDeadPixels) && pixdev < 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((!findHotPixels) && pixdev > 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
pixdev = fabsf(pixdev);
|
||||
float hfnbrave = -pixdev;
|
||||
|
||||
#ifdef __SSE2__
|
||||
// sum up 5*4 = 20 values using SSE
|
||||
// 10 fabs function calls and 10 float additions with SSE
|
||||
vfloat sum = vabsf(LVFU(cfablur[(rr - 2) * W + cc - 2])) + vabsf(LVFU(cfablur[(rr - 1) * W + cc - 2]));
|
||||
sum += vabsf(LVFU(cfablur[(rr) * W + cc - 2]));
|
||||
sum += vabsf(LVFU(cfablur[(rr + 1) * W + cc - 2]));
|
||||
sum += vabsf(LVFU(cfablur[(rr + 2) * W + cc - 2]));
|
||||
// horizontally add the values and add the result to hfnbrave
|
||||
hfnbrave += vhadd(sum);
|
||||
|
||||
// add remaining 5 values of last column
|
||||
for (int mm = rr - 2; mm <= rr + 2; ++mm) {
|
||||
hfnbrave += fabsf(cfablur[mm * W + cc + 2]);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// 25 fabs function calls and 25 float additions without SSE
|
||||
for (int mm = rr - 2; mm <= rr + 2; ++mm) {
|
||||
for (int nn = cc - 2; nn <= cc + 2; ++nn) {
|
||||
hfnbrave += fabsf(cfablur[mm * W + nn]);
|
||||
if (lastRow > 0 && lastRow < H - 2) {
|
||||
//cfa pixel heat/death evaluation
|
||||
for (int rr = lastRow - 1; rr < lastRow + 1; ++rr) {
|
||||
const int i = rr + 2;
|
||||
const int destRow = i % 5;
|
||||
if (i >= H - 2) {
|
||||
for (int j = 2; j < W - 2; j++) {
|
||||
cfablur[destRow][j] = 0.f;
|
||||
}
|
||||
} else {
|
||||
for (int j = 2; j < W - 2; ++j) {
|
||||
const float temp = median(rawData[i - 2][j - 2], rawData[i - 2][j], rawData[i - 2][j + 2],
|
||||
rawData[i][j - 2], rawData[i][j], rawData[i][j + 2],
|
||||
rawData[i + 2][j - 2], rawData[i + 2][j], rawData[i + 2][j + 2]);
|
||||
cfablur[destRow][j] = rawData[i][j] - temp;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
const int rr0 = rr % 5;
|
||||
for (int cc = 2; cc < W - 2; ++cc) {
|
||||
//evaluate pixel for heat/death
|
||||
float pixdev = cfablur[rr0][cc];
|
||||
|
||||
if (pixdev > varthresh * hfnbrave) {
|
||||
// mark the pixel as "bad"
|
||||
bpMap.set(cc, rr);
|
||||
counter++;
|
||||
}
|
||||
}//end of pixel evaluation
|
||||
if (!findDeadPixels && pixdev <= 0.f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!findHotPixels && pixdev >= 0.f) {
|
||||
continue;
|
||||
}
|
||||
|
||||
pixdev = fabsf(pixdev);
|
||||
float hfnbrave = -pixdev;
|
||||
sum5x5(cfablur, cc - 2, hfnbrave);
|
||||
if (pixdev > varthresh * hfnbrave) {
|
||||
// mark the pixel as "bad"
|
||||
bpMap.set(cc, rr);
|
||||
++counter;
|
||||
}
|
||||
}//end of pixel evaluation
|
||||
}
|
||||
}
|
||||
}//end of parallel processing
|
||||
delete [] cfablur;
|
||||
|
||||
return counter;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user