diff --git a/rtengine/badpixels.cc b/rtengine/badpixels.cc index 2710cb28d..0ae63a618 100644 --- a/rtengine/badpixels.cc +++ b/rtengine/badpixels.cc @@ -22,12 +22,39 @@ #include "pixelsmap.h" #include "rawimage.h" #include "rawimagesource.h" +//#define BENCHMARK +#include "StopWatch.h" namespace { unsigned fc(const unsigned int cfa[2][2], int r, int c) { return cfa[r & 1][c & 1]; } + +inline void sum5x5(const array2D& in, int col, float &sum) { +#ifdef __SSE2__ + // sum up 5*4 = 20 values using SSE + // 10 fabs function calls and 10 float additions with SSE + const vfloat sumv = (vabsf(LVFU(in[0][col])) + vabsf(LVFU(in[1][col]))) + + (vabsf(LVFU(in[2][col])) + vabsf(LVFU(in[3][col]))) + + vabsf(LVFU(in[4][col])); + // horizontally add the values and add the result to hfnbrave + sum += vhadd(sumv); + + // add remaining 5 values of last column + sum += (fabsf(in[0][col + 4]) + fabsf(in[1][col + 4])) + + (fabsf(in[2][col + 4]) + fabsf(in[3][col + 4])) + + fabsf(in[4][col + 4]); +#else + // 25 fabs function calls and 25 float additions without SSE + for (int nn = col; nn < col + 5; ++nn) { + sum += (fabsf(in[0][nn]) + fabsf(in[1][nn])) + + (fabsf(in[2][nn]) + fabsf(in[3][nn])) + + fabsf(in[4][nn]); + } +#endif + +} } namespace rtengine @@ -445,126 +472,124 @@ int RawImageSource::interpolateBadPixelsXtrans(const PixelsMap &bitmapBads) /* Search for hot or dead pixels in the image and update the map * For each pixel compare its value to the average of similar color surrounding * (Taken from Emil Martinec idea) - * (Optimized by Ingo Weyrich 2013 and 2015) - */ + * (Optimized by Ingo Weyrich 2013, 2015, and 2019) +*/ int RawImageSource::findHotDeadPixels(PixelsMap &bpMap, const float thresh, const bool findHotPixels, const bool findDeadPixels) const { + BENCHFUN const float varthresh = (20.0 * (thresh / 100.0) + 1.0) / 24.f; - // allocate temporary buffer - float* cfablur = new float[H * W]; - // counter for dead or hot pixels int counter = 0; #ifdef _OPENMP - #pragma omp parallel + #pragma omp parallel reduction(+:counter) #endif { + array2D cfablur(W, 5, ARRAY2D_CLEAR_DATA); + int firstRow = -1; + int lastRow = -1; + #ifdef _OPENMP - #pragma omp for schedule(dynamic,16) nowait + // note, static scheduling is important in this implementation + #pragma omp for schedule(static) nowait #endif - for (int i = 2; i < H - 2; i++) { - for (int j = 2; j < W - 2; j++) { + for (int i = 2; i < H - 2; ++i) { + if (firstRow == -1) { + firstRow = i; + if (firstRow > 2) { + for (int row = firstRow - 2; row < firstRow; ++row) { + const int destRow = row % 5; + for (int j = 2; j < W - 2; ++j) { + const float temp = median(rawData[row - 2][j - 2], rawData[row - 2][j], rawData[row - 2][j + 2], + rawData[row][j - 2], rawData[row][j], rawData[row][j + 2], + rawData[row + 2][j - 2], rawData[row + 2][j], rawData[row + 2][j + 2]); + cfablur[destRow][j] = rawData[row][j] - temp; + } + } + } + } + lastRow = i; + const int destRow = i % 5; + for (int j = 2; j < W - 2; ++j) { const float temp = median(rawData[i - 2][j - 2], rawData[i - 2][j], rawData[i - 2][j + 2], rawData[i][j - 2], rawData[i][j], rawData[i][j + 2], rawData[i + 2][j - 2], rawData[i + 2][j], rawData[i + 2][j + 2]); - cfablur[i * W + j] = rawData[i][j] - temp; + cfablur[destRow][j] = rawData[i][j] - temp; + } + + if (i - 1 > firstRow) { + const int rr = i - 2; + const int rr0 = rr % 5; + for (int cc = 2; cc < W - 2; ++cc) { + //evaluate pixel for heat/death + float pixdev = cfablur[rr0][cc]; + + if (!findDeadPixels && pixdev <= 0.f) { + continue; + } + + if (!findHotPixels && pixdev >= 0.f) { + continue; + } + + pixdev = fabsf(pixdev); + float hfnbrave = -pixdev; + sum5x5(cfablur, cc - 2, hfnbrave); + if (pixdev > varthresh * hfnbrave) { + // mark the pixel as "bad" + bpMap.set(cc, rr); + ++counter; + } + } //end of pixel evaluation } } - // process borders. Former version calculated the median using mirrored border which does not make sense because the original pixel loses weight - // Setting the difference between pixel and median for border pixels to zero should do the job not worse then former version -#ifdef _OPENMP - #pragma omp single -#endif - { - for (int i = 0; i < 2; ++i) { - for (int j = 0; j < W; ++j) { - cfablur[i * W + j] = 0.f; - } - } - - for (int i = 2; i < H - 2; ++i) { - for (int j = 0; j < 2; ++j) { - cfablur[i * W + j] = 0.f; - } - - for (int j = W - 2; j < W; ++j) { - cfablur[i * W + j] = 0.f; - } - } - - for (int i = H - 2; i < H; ++i) { - for (int j = 0; j < W; ++j) { - cfablur[i * W + j] = 0.f; - } - } - } - -#ifdef _OPENMP - #pragma omp barrier // barrier because of nowait clause above - - #pragma omp for reduction(+:counter) schedule(dynamic,16) -#endif - - //cfa pixel heat/death evaluation - for (int rr = 2; rr < H - 2; ++rr) { - for (int cc = 2, rrmWpcc = rr * W + 2; cc < W - 2; ++cc, ++rrmWpcc) { - //evaluate pixel for heat/death - float pixdev = cfablur[rrmWpcc]; - - if (pixdev == 0.f) { - continue; - } - - if ((!findDeadPixels) && pixdev < 0) { - continue; - } - - if ((!findHotPixels) && pixdev > 0) { - continue; - } - - pixdev = fabsf(pixdev); - float hfnbrave = -pixdev; - -#ifdef __SSE2__ - // sum up 5*4 = 20 values using SSE - // 10 fabs function calls and 10 float additions with SSE - vfloat sum = vabsf(LVFU(cfablur[(rr - 2) * W + cc - 2])) + vabsf(LVFU(cfablur[(rr - 1) * W + cc - 2])); - sum += vabsf(LVFU(cfablur[(rr) * W + cc - 2])); - sum += vabsf(LVFU(cfablur[(rr + 1) * W + cc - 2])); - sum += vabsf(LVFU(cfablur[(rr + 2) * W + cc - 2])); - // horizontally add the values and add the result to hfnbrave - hfnbrave += vhadd(sum); - - // add remaining 5 values of last column - for (int mm = rr - 2; mm <= rr + 2; ++mm) { - hfnbrave += fabsf(cfablur[mm * W + cc + 2]); - } - -#else - - // 25 fabs function calls and 25 float additions without SSE - for (int mm = rr - 2; mm <= rr + 2; ++mm) { - for (int nn = cc - 2; nn <= cc + 2; ++nn) { - hfnbrave += fabsf(cfablur[mm * W + nn]); + if (lastRow > 0 && lastRow < H - 2) { + //cfa pixel heat/death evaluation + for (int rr = lastRow - 1; rr < lastRow + 1; ++rr) { + const int i = rr + 2; + const int destRow = i % 5; + if (i >= H - 2) { + for (int j = 2; j < W - 2; j++) { + cfablur[destRow][j] = 0.f; + } + } else { + for (int j = 2; j < W - 2; ++j) { + const float temp = median(rawData[i - 2][j - 2], rawData[i - 2][j], rawData[i - 2][j + 2], + rawData[i][j - 2], rawData[i][j], rawData[i][j + 2], + rawData[i + 2][j - 2], rawData[i + 2][j], rawData[i + 2][j + 2]); + cfablur[destRow][j] = rawData[i][j] - temp; } } -#endif + const int rr0 = rr % 5; + for (int cc = 2; cc < W - 2; ++cc) { + //evaluate pixel for heat/death + float pixdev = cfablur[rr0][cc]; - if (pixdev > varthresh * hfnbrave) { - // mark the pixel as "bad" - bpMap.set(cc, rr); - counter++; - } - }//end of pixel evaluation + if (!findDeadPixels && pixdev <= 0.f) { + continue; + } + + if (!findHotPixels && pixdev >= 0.f) { + continue; + } + + pixdev = fabsf(pixdev); + float hfnbrave = -pixdev; + sum5x5(cfablur, cc - 2, hfnbrave); + if (pixdev > varthresh * hfnbrave) { + // mark the pixel as "bad" + bpMap.set(cc, rr); + ++counter; + } + }//end of pixel evaluation + } } }//end of parallel processing - delete [] cfablur; + return counter; }