diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index 8774ce9f0..936c3bd16 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -22,7 +22,6 @@ #include "rawimagesource.h" #include "rawimagesource_i.h" #include "jaggedarray.h" -#include "median.h" #include "rawimage.h" #include "mytime.h" #include "iccmatrices.h" diff --git a/rtengine/median.h b/rtengine/median.h index c2c969492..177d50336 100644 --- a/rtengine/median.h +++ b/rtengine/median.h @@ -18,17 +18,8 @@ */ #include "rt_math.h" -#define SORT3(a1,a2,a3,b1,b2,b3) \ - { \ - b2 = min(a1,a2);\ - b1 = min(b2,a3);\ - b3 = max(a1,a2);\ - b2 = max(b2, min(b3,a3));\ - b3 = max(b3,a3);\ - } - - -#define NETWORKSORT4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \ +// middle 4 of 6 elements, +#define MIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \ {\ d1 = min(s1,s2);\ d2 = max(s1,s2);\ @@ -44,23 +35,32 @@ d5 = max(s3,d5);\ d3 = temp;\ temp = min(d3,d4);\ d4 = max(d3,d4);\ -d3 = temp;\ -d3 = max(d0,d3);\ -temp = min(d1,d4);\ -d4 = max(d1,d4);\ -d1 = temp;\ +d3 = max(d0,temp);\ d2 = min(d2,d5);\ -temp = min(d2,d4);\ -d4 = max(d2,d4);\ -d2 = temp;\ -temp = min(d1,d3);\ -d3 = max(d1,d3);\ -d1 = temp;\ -temp = min(d2,d3);\ -d3 = max(d2,d3);\ -d2 = temp;\ } +// middle 4 of 6 elements, +#define VMIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \ +{\ +d1 = vminf(s1,s2);\ +d2 = vmaxf(s1,s2);\ +d0 = vminf(s0,d2);\ +d2 = vmaxf(s0,d2);\ +temp = vminf(d0,d1);\ +d1 = vmaxf(d0,d1);\ +d0 = temp;\ +d4 = vminf(s4,s5);\ +d5 = vmaxf(s4,s5);\ +temp = vminf(s3,d5);\ +d5 = vmaxf(s3,d5);\ +d3 = temp;\ +temp = vminf(d3,d4);\ +d4 = vmaxf(d3,d4);\ +d3 = vmaxf(d0,temp);\ +d2 = vminf(d2,d5);\ +} + + #define MEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \ {\ t0 = min(s0,s5);\ @@ -77,13 +77,36 @@ t5 = max(t3,t5);\ t3 = median;\ median = min(t2,t6);\ t6 = max(t2,t6);\ -t2 = median;\ -t3 = max(t2,t3);\ +t3 = max(median,t3);\ t3 = min(t3,t6);\ t4 = min(t4,t5);\ median = min(t1,t4);\ t4 = max(t1,t4);\ -t1 = median;\ -t3 = max(t1,t3);\ +t3 = max(median,t3);\ median = min(t3,t4);\ } + +#define VMEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \ +{\ +t0 = vminf(s0,s5);\ +t5 = vmaxf(s0,s5);\ +t3 = vmaxf(t0,s3);\ +t0 = vminf(t0,s3);\ +t1 = vminf(s1,s6);\ +t6 = vmaxf(s1,s6);\ +t2 = vminf(s2,s4);\ +t4 = vmaxf(s2,s4);\ +t1 = vmaxf(t0,t1);\ +median = vminf(t3,t5);\ +t5 = vmaxf(t3,t5);\ +t3 = median;\ +median = vminf(t2,t6);\ +t6 = vmaxf(t2,t6);\ +t3 = vmaxf(median,t3);\ +t3 = vminf(t3,t6);\ +t4 = vminf(t4,t5);\ +median = vminf(t1,t4);\ +t4 = vmaxf(t1,t4);\ +t3 = vmaxf(median,t3);\ +median = vminf(t3,t4);\ +} diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index c18bc6c8a..4cb7925bb 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -33,6 +33,8 @@ #include "dcp.h" #include "rt_math.h" #include "improcfun.h" +#define BENCHMARK +#include "StopWatch.h" #ifdef _OPENMP #include #endif @@ -3387,20 +3389,24 @@ int RawImageSource::defTransform (int tran) //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // Thread called part -void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to) +void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D &rbconv_Y, array2D &rbconv_I, array2D &rbconv_Q, array2D &rbout_I, array2D &rbout_Q, const int row_from, const int row_to) { int W = im->width; - array2D rbconv_Y (W, 3); - array2D rbconv_I (W, 3); - array2D rbconv_Q (W, 3); - array2D rbout_I (W, 3); - array2D rbout_Q (W, 3); - float row_I[W]; float row_Q[W]; +#ifdef __SSE2__ + vfloat buffer[12]; + vfloat* pre1 = &buffer[0]; + vfloat* pre2 = &buffer[3]; + vfloat* post1 = &buffer[6]; + vfloat* post2 = &buffer[9]; + + vfloat middle[6]; + +#else float buffer[12]; float* pre1 = &buffer[0]; float* pre2 = &buffer[3]; @@ -3408,6 +3414,7 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i float* post2 = &buffer[9]; float middle[6]; +#endif int px = (row_from - 1) % 3, cx = row_from % 3, nx = 0; @@ -3427,23 +3434,56 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i convert_row_to_YIQ (im->r(i + 1), im->g(i + 1), im->b(i + 1), rbconv_Y[nx], rbconv_I[nx], rbconv_Q[nx], W); +#ifdef __SSE2__ + pre1[0] = _mm_setr_ps(rbconv_I[px][0], rbconv_Q[px][0], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][0], rbconv_Q[cx][0], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][0], rbconv_Q[nx][0], 0, 0); + pre2[0] = _mm_setr_ps(rbconv_I[px][1], rbconv_Q[px][1], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][1], rbconv_Q[cx][1], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][1], rbconv_Q[nx][1], 0, 0); + vfloat temp[7]; + + // fill first element in rbout_I and rbout_Q + rbout_I[cx][0] = rbconv_I[cx][0]; + rbout_Q[cx][0] = rbconv_Q[cx][0]; + + // median I channel + for (int j = 1; j < W - 2; j += 2) { + post1[0] = _mm_setr_ps(rbconv_I[px][j + 1], rbconv_Q[px][j + 1], 0, 0), post1[1] = _mm_setr_ps(rbconv_I[cx][j + 1], rbconv_Q[cx][j + 1], 0, 0), post1[2] = _mm_setr_ps(rbconv_I[nx][j + 1], rbconv_Q[nx][j + 1], 0, 0); + VMIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); + vfloat medianval; + VMEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval); + rbout_I[cx][j] = medianval[0]; + rbout_Q[cx][j] = medianval[1]; + post2[0] = _mm_setr_ps(rbconv_I[px][j + 2], rbconv_Q[px][j + 2], 0, 0), post2[1] = _mm_setr_ps(rbconv_I[cx][j + 2], rbconv_Q[cx][j + 2], 0, 0), post2[2] = _mm_setr_ps(rbconv_I[nx][j + 2], rbconv_Q[nx][j + 2], 0, 0); + VMEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval); + rbout_I[cx][j + 1] = medianval[0]; + rbout_Q[cx][j + 1] = medianval[1]; + std::swap(pre1, post1); + std::swap(pre2, post2); + } + + // fill last elements in rbout_I and rbout_Q + rbout_I[cx][W - 1] = rbconv_I[cx][W - 1]; + rbout_I[cx][W - 2] = rbconv_I[cx][W - 2]; + rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1]; + rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2]; + +#else pre1[0] = rbconv_I[px][0], pre1[1] = rbconv_I[cx][0], pre1[2] = rbconv_I[nx][0]; pre2[0] = rbconv_I[px][1], pre2[1] = rbconv_I[cx][1], pre2[2] = rbconv_I[nx][1]; - float temp[7]; // fill first element in rbout_I rbout_I[cx][0] = rbconv_I[cx][0]; + // median I channel for (int j = 1; j < W - 2; j += 2) { post1[0] = rbconv_I[px][j + 1], post1[1] = rbconv_I[cx][j + 1], post1[2] = rbconv_I[nx][j + 1]; - NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); + MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]); post2[0] = rbconv_I[px][j + 2], post2[1] = rbconv_I[cx][j + 2], post2[2] = rbconv_I[nx][j + 2]; MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]); - std::swap(pre1,post1); - std::swap(pre2,post2); + std::swap(pre1, post1); + std::swap(pre2, post2); } + // fill last elements in rbout_I rbout_I[cx][W - 1] = rbconv_I[cx][W - 1]; rbout_I[cx][W - 2] = rbconv_I[cx][W - 2]; @@ -3453,19 +3493,22 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i // fill first element in rbout_Q rbout_Q[cx][0] = rbconv_Q[cx][0]; + // median Q channel for (int j = 1; j < W - 2; j += 2) { post1[0] = rbconv_Q[px][j + 1], post1[1] = rbconv_Q[cx][j + 1], post1[2] = rbconv_Q[nx][j + 1]; - NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); + MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]); post2[0] = rbconv_Q[px][j + 2], post2[1] = rbconv_Q[cx][j + 2], post2[2] = rbconv_Q[nx][j + 2]; MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]); - std::swap(pre1,post1); - std::swap(pre2,post2); + std::swap(pre1, post1); + std::swap(pre2, post2); } + // fill last elements in rbout_Q rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1]; rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2]; +#endif // blur i-1th row if (i > row_from) { @@ -3504,29 +3547,39 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i // correction_YIQ_LQ void RawImageSource::processFalseColorCorrection (Imagefloat* im, const int steps) { + BENCHFUN if (im->height < 4 || steps < 1) { return; } - for (int t = 0; t < steps; t++) { #ifdef _OPENMP - #pragma omp parallel - { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int blk = (im->height - 2) / nthreads; + #pragma omp parallel + { + multi_array2D buffer (W, 3); + int tid = omp_get_thread_num(); + int nthreads = omp_get_num_threads(); + int blk = (im->height - 2) / nthreads; - if (tid < nthreads - 1) - { - processFalseColorCorrectionThread (im, 1 + tid * blk, 1 + (tid + 1)*blk); - } else - { processFalseColorCorrectionThread (im, 1 + tid * blk, im->height - 1); } + for (int t = 0; t < steps; t++) { + + if (tid < nthreads - 1) { + processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, 1 + (tid + 1)*blk); + } else { + processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, im->height - 1); + } + + #pragma omp barrier } -#else - processFalseColorCorrectionThread (im, 1 , im->height - 1); -#endif } +#else + multi_array2D buffer (W, 3); + + for (int t = 0; t < steps; t++) { + processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 , im->height - 1); + } + +#endif } // Some camera input profiles need gamma preprocessing diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index 35da831a0..fad7e749e 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -95,7 +95,7 @@ protected: void hphd_vertical (float** hpmap, int col_from, int col_to); void hphd_horizontal (float** hpmap, int row_from, int row_to); void hphd_green (float** hpmap); - void processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to); + void processFalseColorCorrectionThread (Imagefloat* im, array2D &rbconv_Y, array2D &rbconv_I, array2D &rbconv_Q, array2D &rbout_I, array2D &rbout_Q, const int row_from, const int row_to); void hlRecovery (std::string method, float* red, float* green, float* blue, int i, int sx1, int width, int skip, const RAWParams &raw, float* hlmax); int defTransform (int tran); void transformRect (PreviewProps pp, int tran, int &sx1, int &sy1, int &width, int &height, int &fw);