From 4fdcfbd8df7b6d7183fcdb8f50ae88fa036bc651 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sat, 12 Mar 2016 00:11:21 +0100 Subject: [PATCH 1/6] 10% speedup for raw false colour suppression --- rtengine/rawimagesource.cc | 84 ++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 48 deletions(-) diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 0125f1e63..c18bc6c8a 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -3401,19 +3401,13 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i float row_I[W]; float row_Q[W]; - float buffer[3 * 8]; - float* pre1_I = &buffer[0]; - float* pre2_I = &buffer[3]; - float* post1_I = &buffer[6]; - float* post2_I = &buffer[9]; - float* pre1_Q = &buffer[12]; - float* pre2_Q = &buffer[15]; - float* post1_Q = &buffer[18]; - float* post2_Q = &buffer[21]; + float buffer[12]; + float* pre1 = &buffer[0]; + float* pre2 = &buffer[3]; + float* post1 = &buffer[6]; + float* post2 = &buffer[9]; - float middle_I[6]; - float middle_Q[6]; - float* tmp; + float middle[6]; int px = (row_from - 1) % 3, cx = row_from % 3, nx = 0; @@ -3433,49 +3427,43 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i convert_row_to_YIQ (im->r(i + 1), im->g(i + 1), im->b(i + 1), rbconv_Y[nx], rbconv_I[nx], rbconv_Q[nx], W); - SORT3(rbconv_I[px][0], rbconv_I[cx][0], rbconv_I[nx][0], pre1_I[0], pre1_I[1], pre1_I[2]); - SORT3(rbconv_I[px][1], rbconv_I[cx][1], rbconv_I[nx][1], pre2_I[0], pre2_I[1], pre2_I[2]); - SORT3(rbconv_Q[px][0], rbconv_Q[cx][0], rbconv_Q[nx][0], pre1_Q[0], pre1_Q[1], pre1_Q[2]); - SORT3(rbconv_Q[px][1], rbconv_Q[cx][1], rbconv_Q[nx][1], pre2_Q[0], pre2_Q[1], pre2_Q[2]); + pre1[0] = rbconv_I[px][0], pre1[1] = rbconv_I[cx][0], pre1[2] = rbconv_I[nx][0]; + pre2[0] = rbconv_I[px][1], pre2[1] = rbconv_I[cx][1], pre2[2] = rbconv_I[nx][1]; - // median I channel float temp[7]; - for (int j = 1; j < W - 2; j += 2) { - SORT3(rbconv_I[px][j + 1], rbconv_I[cx][j + 1], rbconv_I[nx][j + 1], post1_I[0], post1_I[1], post1_I[2]); - NETWORKSORT4OF6(pre2_I[0], pre2_I[1], pre2_I[2], post1_I[0], post1_I[1], post1_I[2], middle_I[0], middle_I[1], middle_I[2], middle_I[3], middle_I[4], middle_I[5], temp[0]); - SORT3(rbconv_I[px][j + 2], rbconv_I[cx][j + 2], rbconv_I[nx][j + 2], post2_I[0], post2_I[1], post2_I[2]); - MEDIAN7(pre1_I[0], pre1_I[1], pre1_I[2], middle_I[1], middle_I[2], middle_I[3], middle_I[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]); - MEDIAN7(post2_I[0], post2_I[1], post2_I[2], middle_I[1], middle_I[2], middle_I[3], middle_I[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]); - tmp = pre1_I; - pre1_I = post1_I; - post1_I = tmp; - tmp = pre2_I; - pre2_I = post2_I; - post2_I = tmp; - - } - - // median Q channel - for (int j = 1; j < W - 2; j += 2) { - SORT3(rbconv_Q[px][j + 1], rbconv_Q[cx][j + 1], rbconv_Q[nx][j + 1], post1_Q[0], post1_Q[1], post1_Q[2]); - NETWORKSORT4OF6(pre2_Q[0], pre2_Q[1], pre2_Q[2], post1_Q[0], post1_Q[1], post1_Q[2], middle_Q[0], middle_Q[1], middle_Q[2], middle_Q[3], middle_Q[4], middle_Q[5], temp[0]); - SORT3(rbconv_Q[px][j + 2], rbconv_Q[cx][j + 2], rbconv_Q[nx][j + 2], post2_Q[0], post2_Q[1], post2_Q[2]); - MEDIAN7(pre1_Q[0], pre1_Q[1], pre1_Q[2], middle_Q[1], middle_Q[2], middle_Q[3], middle_Q[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]); - MEDIAN7(post2_Q[0], post2_Q[1], post2_Q[2], middle_Q[1], middle_Q[2], middle_Q[3], middle_Q[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]); - tmp = pre1_Q; - pre1_Q = post1_Q; - post1_Q = tmp; - tmp = pre2_Q; - pre2_Q = post2_Q; - post2_Q = tmp; - } - - // fill first and last element in rbout + // fill first element in rbout_I rbout_I[cx][0] = rbconv_I[cx][0]; + // median I channel + for (int j = 1; j < W - 2; j += 2) { + post1[0] = rbconv_I[px][j + 1], post1[1] = rbconv_I[cx][j + 1], post1[2] = rbconv_I[nx][j + 1]; + NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); + MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]); + post2[0] = rbconv_I[px][j + 2], post2[1] = rbconv_I[cx][j + 2], post2[2] = rbconv_I[nx][j + 2]; + MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]); + std::swap(pre1,post1); + std::swap(pre2,post2); + } + // fill last elements in rbout_I rbout_I[cx][W - 1] = rbconv_I[cx][W - 1]; rbout_I[cx][W - 2] = rbconv_I[cx][W - 2]; + + pre1[0] = rbconv_Q[px][0], pre1[1] = rbconv_Q[cx][0], pre1[2] = rbconv_Q[nx][0]; + pre2[0] = rbconv_Q[px][1], pre2[1] = rbconv_Q[cx][1], pre2[2] = rbconv_Q[nx][1]; + + // fill first element in rbout_Q rbout_Q[cx][0] = rbconv_Q[cx][0]; + // median Q channel + for (int j = 1; j < W - 2; j += 2) { + post1[0] = rbconv_Q[px][j + 1], post1[1] = rbconv_Q[cx][j + 1], post1[2] = rbconv_Q[nx][j + 1]; + NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); + MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]); + post2[0] = rbconv_Q[px][j + 2], post2[1] = rbconv_Q[cx][j + 2], post2[2] = rbconv_Q[nx][j + 2]; + MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]); + std::swap(pre1,post1); + std::swap(pre2,post2); + } + // fill last elements in rbout_Q rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1]; rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2]; From a55167bde8fb290ef4f6711e2d6f0e64455d1a26 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sat, 12 Mar 2016 23:32:25 +0100 Subject: [PATCH 2/6] 24% speedup for raw false colour suppression --- rtengine/demosaic_algos.cc | 1 - rtengine/median.h | 81 +++++++++++++++++---------- rtengine/rawimagesource.cc | 109 +++++++++++++++++++++++++++---------- rtengine/rawimagesource.h | 2 +- 4 files changed, 134 insertions(+), 59 deletions(-) diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index 8774ce9f0..936c3bd16 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -22,7 +22,6 @@ #include "rawimagesource.h" #include "rawimagesource_i.h" #include "jaggedarray.h" -#include "median.h" #include "rawimage.h" #include "mytime.h" #include "iccmatrices.h" diff --git a/rtengine/median.h b/rtengine/median.h index c2c969492..177d50336 100644 --- a/rtengine/median.h +++ b/rtengine/median.h @@ -18,17 +18,8 @@ */ #include "rt_math.h" -#define SORT3(a1,a2,a3,b1,b2,b3) \ - { \ - b2 = min(a1,a2);\ - b1 = min(b2,a3);\ - b3 = max(a1,a2);\ - b2 = max(b2, min(b3,a3));\ - b3 = max(b3,a3);\ - } - - -#define NETWORKSORT4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \ +// middle 4 of 6 elements, +#define MIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \ {\ d1 = min(s1,s2);\ d2 = max(s1,s2);\ @@ -44,23 +35,32 @@ d5 = max(s3,d5);\ d3 = temp;\ temp = min(d3,d4);\ d4 = max(d3,d4);\ -d3 = temp;\ -d3 = max(d0,d3);\ -temp = min(d1,d4);\ -d4 = max(d1,d4);\ -d1 = temp;\ +d3 = max(d0,temp);\ d2 = min(d2,d5);\ -temp = min(d2,d4);\ -d4 = max(d2,d4);\ -d2 = temp;\ -temp = min(d1,d3);\ -d3 = max(d1,d3);\ -d1 = temp;\ -temp = min(d2,d3);\ -d3 = max(d2,d3);\ -d2 = temp;\ } +// middle 4 of 6 elements, +#define VMIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \ +{\ +d1 = vminf(s1,s2);\ +d2 = vmaxf(s1,s2);\ +d0 = vminf(s0,d2);\ +d2 = vmaxf(s0,d2);\ +temp = vminf(d0,d1);\ +d1 = vmaxf(d0,d1);\ +d0 = temp;\ +d4 = vminf(s4,s5);\ +d5 = vmaxf(s4,s5);\ +temp = vminf(s3,d5);\ +d5 = vmaxf(s3,d5);\ +d3 = temp;\ +temp = vminf(d3,d4);\ +d4 = vmaxf(d3,d4);\ +d3 = vmaxf(d0,temp);\ +d2 = vminf(d2,d5);\ +} + + #define MEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \ {\ t0 = min(s0,s5);\ @@ -77,13 +77,36 @@ t5 = max(t3,t5);\ t3 = median;\ median = min(t2,t6);\ t6 = max(t2,t6);\ -t2 = median;\ -t3 = max(t2,t3);\ +t3 = max(median,t3);\ t3 = min(t3,t6);\ t4 = min(t4,t5);\ median = min(t1,t4);\ t4 = max(t1,t4);\ -t1 = median;\ -t3 = max(t1,t3);\ +t3 = max(median,t3);\ median = min(t3,t4);\ } + +#define VMEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \ +{\ +t0 = vminf(s0,s5);\ +t5 = vmaxf(s0,s5);\ +t3 = vmaxf(t0,s3);\ +t0 = vminf(t0,s3);\ +t1 = vminf(s1,s6);\ +t6 = vmaxf(s1,s6);\ +t2 = vminf(s2,s4);\ +t4 = vmaxf(s2,s4);\ +t1 = vmaxf(t0,t1);\ +median = vminf(t3,t5);\ +t5 = vmaxf(t3,t5);\ +t3 = median;\ +median = vminf(t2,t6);\ +t6 = vmaxf(t2,t6);\ +t3 = vmaxf(median,t3);\ +t3 = vminf(t3,t6);\ +t4 = vminf(t4,t5);\ +median = vminf(t1,t4);\ +t4 = vmaxf(t1,t4);\ +t3 = vmaxf(median,t3);\ +median = vminf(t3,t4);\ +} diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index c18bc6c8a..4cb7925bb 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -33,6 +33,8 @@ #include "dcp.h" #include "rt_math.h" #include "improcfun.h" +#define BENCHMARK +#include "StopWatch.h" #ifdef _OPENMP #include #endif @@ -3387,20 +3389,24 @@ int RawImageSource::defTransform (int tran) //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // Thread called part -void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to) +void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D &rbconv_Y, array2D &rbconv_I, array2D &rbconv_Q, array2D &rbout_I, array2D &rbout_Q, const int row_from, const int row_to) { int W = im->width; - array2D rbconv_Y (W, 3); - array2D rbconv_I (W, 3); - array2D rbconv_Q (W, 3); - array2D rbout_I (W, 3); - array2D rbout_Q (W, 3); - float row_I[W]; float row_Q[W]; +#ifdef __SSE2__ + vfloat buffer[12]; + vfloat* pre1 = &buffer[0]; + vfloat* pre2 = &buffer[3]; + vfloat* post1 = &buffer[6]; + vfloat* post2 = &buffer[9]; + + vfloat middle[6]; + +#else float buffer[12]; float* pre1 = &buffer[0]; float* pre2 = &buffer[3]; @@ -3408,6 +3414,7 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i float* post2 = &buffer[9]; float middle[6]; +#endif int px = (row_from - 1) % 3, cx = row_from % 3, nx = 0; @@ -3427,23 +3434,56 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i convert_row_to_YIQ (im->r(i + 1), im->g(i + 1), im->b(i + 1), rbconv_Y[nx], rbconv_I[nx], rbconv_Q[nx], W); +#ifdef __SSE2__ + pre1[0] = _mm_setr_ps(rbconv_I[px][0], rbconv_Q[px][0], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][0], rbconv_Q[cx][0], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][0], rbconv_Q[nx][0], 0, 0); + pre2[0] = _mm_setr_ps(rbconv_I[px][1], rbconv_Q[px][1], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][1], rbconv_Q[cx][1], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][1], rbconv_Q[nx][1], 0, 0); + vfloat temp[7]; + + // fill first element in rbout_I and rbout_Q + rbout_I[cx][0] = rbconv_I[cx][0]; + rbout_Q[cx][0] = rbconv_Q[cx][0]; + + // median I channel + for (int j = 1; j < W - 2; j += 2) { + post1[0] = _mm_setr_ps(rbconv_I[px][j + 1], rbconv_Q[px][j + 1], 0, 0), post1[1] = _mm_setr_ps(rbconv_I[cx][j + 1], rbconv_Q[cx][j + 1], 0, 0), post1[2] = _mm_setr_ps(rbconv_I[nx][j + 1], rbconv_Q[nx][j + 1], 0, 0); + VMIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); + vfloat medianval; + VMEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval); + rbout_I[cx][j] = medianval[0]; + rbout_Q[cx][j] = medianval[1]; + post2[0] = _mm_setr_ps(rbconv_I[px][j + 2], rbconv_Q[px][j + 2], 0, 0), post2[1] = _mm_setr_ps(rbconv_I[cx][j + 2], rbconv_Q[cx][j + 2], 0, 0), post2[2] = _mm_setr_ps(rbconv_I[nx][j + 2], rbconv_Q[nx][j + 2], 0, 0); + VMEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval); + rbout_I[cx][j + 1] = medianval[0]; + rbout_Q[cx][j + 1] = medianval[1]; + std::swap(pre1, post1); + std::swap(pre2, post2); + } + + // fill last elements in rbout_I and rbout_Q + rbout_I[cx][W - 1] = rbconv_I[cx][W - 1]; + rbout_I[cx][W - 2] = rbconv_I[cx][W - 2]; + rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1]; + rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2]; + +#else pre1[0] = rbconv_I[px][0], pre1[1] = rbconv_I[cx][0], pre1[2] = rbconv_I[nx][0]; pre2[0] = rbconv_I[px][1], pre2[1] = rbconv_I[cx][1], pre2[2] = rbconv_I[nx][1]; - float temp[7]; // fill first element in rbout_I rbout_I[cx][0] = rbconv_I[cx][0]; + // median I channel for (int j = 1; j < W - 2; j += 2) { post1[0] = rbconv_I[px][j + 1], post1[1] = rbconv_I[cx][j + 1], post1[2] = rbconv_I[nx][j + 1]; - NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); + MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]); post2[0] = rbconv_I[px][j + 2], post2[1] = rbconv_I[cx][j + 2], post2[2] = rbconv_I[nx][j + 2]; MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]); - std::swap(pre1,post1); - std::swap(pre2,post2); + std::swap(pre1, post1); + std::swap(pre2, post2); } + // fill last elements in rbout_I rbout_I[cx][W - 1] = rbconv_I[cx][W - 1]; rbout_I[cx][W - 2] = rbconv_I[cx][W - 2]; @@ -3453,19 +3493,22 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i // fill first element in rbout_Q rbout_Q[cx][0] = rbconv_Q[cx][0]; + // median Q channel for (int j = 1; j < W - 2; j += 2) { post1[0] = rbconv_Q[px][j + 1], post1[1] = rbconv_Q[cx][j + 1], post1[2] = rbconv_Q[nx][j + 1]; - NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); + MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]); post2[0] = rbconv_Q[px][j + 2], post2[1] = rbconv_Q[cx][j + 2], post2[2] = rbconv_Q[nx][j + 2]; MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]); - std::swap(pre1,post1); - std::swap(pre2,post2); + std::swap(pre1, post1); + std::swap(pre2, post2); } + // fill last elements in rbout_Q rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1]; rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2]; +#endif // blur i-1th row if (i > row_from) { @@ -3504,29 +3547,39 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i // correction_YIQ_LQ void RawImageSource::processFalseColorCorrection (Imagefloat* im, const int steps) { + BENCHFUN if (im->height < 4 || steps < 1) { return; } - for (int t = 0; t < steps; t++) { #ifdef _OPENMP - #pragma omp parallel - { - int tid = omp_get_thread_num(); - int nthreads = omp_get_num_threads(); - int blk = (im->height - 2) / nthreads; + #pragma omp parallel + { + multi_array2D buffer (W, 3); + int tid = omp_get_thread_num(); + int nthreads = omp_get_num_threads(); + int blk = (im->height - 2) / nthreads; - if (tid < nthreads - 1) - { - processFalseColorCorrectionThread (im, 1 + tid * blk, 1 + (tid + 1)*blk); - } else - { processFalseColorCorrectionThread (im, 1 + tid * blk, im->height - 1); } + for (int t = 0; t < steps; t++) { + + if (tid < nthreads - 1) { + processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, 1 + (tid + 1)*blk); + } else { + processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, im->height - 1); + } + + #pragma omp barrier } -#else - processFalseColorCorrectionThread (im, 1 , im->height - 1); -#endif } +#else + multi_array2D buffer (W, 3); + + for (int t = 0; t < steps; t++) { + processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 , im->height - 1); + } + +#endif } // Some camera input profiles need gamma preprocessing diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index 35da831a0..fad7e749e 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -95,7 +95,7 @@ protected: void hphd_vertical (float** hpmap, int col_from, int col_to); void hphd_horizontal (float** hpmap, int row_from, int row_to); void hphd_green (float** hpmap); - void processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to); + void processFalseColorCorrectionThread (Imagefloat* im, array2D &rbconv_Y, array2D &rbconv_I, array2D &rbconv_Q, array2D &rbout_I, array2D &rbout_Q, const int row_from, const int row_to); void hlRecovery (std::string method, float* red, float* green, float* blue, int i, int sx1, int width, int skip, const RAWParams &raw, float* hlmax); int defTransform (int tran); void transformRect (PreviewProps pp, int tran, int &sx1, int &sy1, int &width, int &height, int &fw); From 8c4c8ac175e98669985adadc7843f660daa18e25 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 13 Mar 2016 13:49:46 +0100 Subject: [PATCH 3/6] One less operation in MIDDLE4OF6 --- rtengine/median.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/rtengine/median.h b/rtengine/median.h index 177d50336..d5e88d9de 100644 --- a/rtengine/median.h +++ b/rtengine/median.h @@ -30,16 +30,15 @@ d1 = max(d0,d1);\ d0 = temp;\ d4 = min(s4,s5);\ d5 = max(s4,s5);\ -temp = min(s3,d5);\ +d3 = min(s3,d5);\ d5 = max(s3,d5);\ -d3 = temp;\ temp = min(d3,d4);\ d4 = max(d3,d4);\ d3 = max(d0,temp);\ d2 = min(d2,d5);\ } -// middle 4 of 6 elements, +// middle 4 of 6 elements, vectorized #define VMIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \ {\ d1 = vminf(s1,s2);\ @@ -51,9 +50,8 @@ d1 = vmaxf(d0,d1);\ d0 = temp;\ d4 = vminf(s4,s5);\ d5 = vmaxf(s4,s5);\ -temp = vminf(s3,d5);\ +d3 = vminf(s3,d5);\ d5 = vmaxf(s3,d5);\ -d3 = temp;\ temp = vminf(d3,d4);\ d4 = vmaxf(d3,d4);\ d3 = vmaxf(d0,temp);\ From 9b2e19717201ec020feed11a3199c1c8c269c7fa Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 13 Mar 2016 14:25:33 +0100 Subject: [PATCH 4/6] Fixed copy/paste bug in processFalseColorCorrectionThread --- rtengine/rawimagesource.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 4cb7925bb..39ed0a01d 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -3436,7 +3436,7 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D #ifdef __SSE2__ pre1[0] = _mm_setr_ps(rbconv_I[px][0], rbconv_Q[px][0], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][0], rbconv_Q[cx][0], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][0], rbconv_Q[nx][0], 0, 0); - pre2[0] = _mm_setr_ps(rbconv_I[px][1], rbconv_Q[px][1], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][1], rbconv_Q[cx][1], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][1], rbconv_Q[nx][1], 0, 0); + pre2[0] = _mm_setr_ps(rbconv_I[px][1], rbconv_Q[px][1], 0, 0) , pre2[1] = _mm_setr_ps(rbconv_I[cx][1], rbconv_Q[cx][1], 0, 0), pre2[2] = _mm_setr_ps(rbconv_I[nx][1], rbconv_Q[nx][1], 0, 0); vfloat temp[7]; // fill first element in rbout_I and rbout_Q From c200c266c050194de90329d59c911a078f0064f0 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 13 Mar 2016 17:15:31 +0100 Subject: [PATCH 5/6] 5% speedup for raw false colour suppression --- rtengine/rawimagesource.cc | 35 +++++++++++++++++------------------ rtengine/rawimagesource.h | 1 + rtengine/rawimagesource_i.h | 7 +++++++ 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 39ed0a01d..f7f4ec36e 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -3392,10 +3392,8 @@ int RawImageSource::defTransform (int tran) void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D &rbconv_Y, array2D &rbconv_I, array2D &rbconv_Q, array2D &rbout_I, array2D &rbout_Q, const int row_from, const int row_to) { - int W = im->width; - - float row_I[W]; - float row_Q[W]; + const int W = im->width; + constexpr float onebynine = 1.f / 9.f; #ifdef __SSE2__ vfloat buffer[12]; @@ -3512,34 +3510,35 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D // blur i-1th row if (i > row_from) { + convert_to_RGB (im->r(i - 1, 0), im->g(i - 1, 0), im->b(i - 1, 0), rbconv_Y[px][0], rbout_I[px][0], rbout_Q[px][0]); + #ifdef _OPENMP #pragma omp simd #endif for (int j = 1; j < W - 1; j++) { - row_I[j] = (rbout_I[px][j - 1] + rbout_I[px][j] + rbout_I[px][j + 1] + rbout_I[cx][j - 1] + rbout_I[cx][j] + rbout_I[cx][j + 1] + rbout_I[nx][j - 1] + rbout_I[nx][j] + rbout_I[nx][j + 1]) / 9; - row_Q[j] = (rbout_Q[px][j - 1] + rbout_Q[px][j] + rbout_Q[px][j + 1] + rbout_Q[cx][j - 1] + rbout_Q[cx][j] + rbout_Q[cx][j + 1] + rbout_Q[nx][j - 1] + rbout_Q[nx][j] + rbout_Q[nx][j + 1]) / 9; + float I = (rbout_I[px][j - 1] + rbout_I[px][j] + rbout_I[px][j + 1] + rbout_I[cx][j - 1] + rbout_I[cx][j] + rbout_I[cx][j + 1] + rbout_I[nx][j - 1] + rbout_I[nx][j] + rbout_I[nx][j + 1]) * onebynine; + float Q = (rbout_Q[px][j - 1] + rbout_Q[px][j] + rbout_Q[px][j + 1] + rbout_Q[cx][j - 1] + rbout_Q[cx][j] + rbout_Q[cx][j + 1] + rbout_Q[nx][j - 1] + rbout_Q[nx][j] + rbout_Q[nx][j + 1]) * onebynine; + convert_to_RGB (im->r(i - 1, j), im->g(i - 1, j), im->b(i - 1, j), rbconv_Y[px][j], I, Q); } - row_I[0] = rbout_I[px][0]; - row_Q[0] = rbout_Q[px][0]; - row_I[W - 1] = rbout_I[px][W - 1]; - row_Q[W - 1] = rbout_Q[px][W - 1]; - convert_row_to_RGB (im->r(i - 1), im->g(i - 1), im->b(i - 1), rbconv_Y[px], row_I, row_Q, W); + convert_to_RGB (im->r(i - 1, W - 1), im->g(i - 1, W - 1), im->b(i - 1, W - 1), rbconv_Y[px][W - 1], rbout_I[px][W - 1], rbout_Q[px][W - 1]); } } // blur last 3 row and finalize H-1th row + convert_to_RGB (im->r(row_to - 1, 0), im->g(row_to - 1, 0), im->b(row_to - 1, 0), rbconv_Y[cx][0], rbout_I[cx][0], rbout_Q[cx][0]); +#ifdef _OPENMP + #pragma omp simd +#endif + for (int j = 1; j < W - 1; j++) { - row_I[j] = (rbout_I[px][j - 1] + rbout_I[px][j] + rbout_I[px][j + 1] + rbout_I[cx][j - 1] + rbout_I[cx][j] + rbout_I[cx][j + 1] + rbconv_I[nx][j - 1] + rbconv_I[nx][j] + rbconv_I[nx][j + 1]) / 9; - row_Q[j] = (rbout_Q[px][j - 1] + rbout_Q[px][j] + rbout_Q[px][j + 1] + rbout_Q[cx][j - 1] + rbout_Q[cx][j] + rbout_Q[cx][j + 1] + rbconv_Q[nx][j - 1] + rbconv_Q[nx][j] + rbconv_Q[nx][j + 1]) / 9; + float I = (rbout_I[px][j - 1] + rbout_I[px][j] + rbout_I[px][j + 1] + rbout_I[cx][j - 1] + rbout_I[cx][j] + rbout_I[cx][j + 1] + rbconv_I[nx][j - 1] + rbconv_I[nx][j] + rbconv_I[nx][j + 1]) * onebynine; + float Q = (rbout_Q[px][j - 1] + rbout_Q[px][j] + rbout_Q[px][j + 1] + rbout_Q[cx][j - 1] + rbout_Q[cx][j] + rbout_Q[cx][j + 1] + rbconv_Q[nx][j - 1] + rbconv_Q[nx][j] + rbconv_Q[nx][j + 1]) * onebynine; + convert_to_RGB (im->r(row_to - 1, j), im->g(row_to - 1, j), im->b(row_to - 1, j), rbconv_Y[cx][j], I, Q); } - row_I[0] = rbout_I[cx][0]; - row_Q[0] = rbout_Q[cx][0]; - row_I[W - 1] = rbout_I[cx][W - 1]; - row_Q[W - 1] = rbout_Q[cx][W - 1]; - convert_row_to_RGB (im->r(row_to - 1), im->g(row_to - 1), im->b(row_to - 1), rbconv_Y[cx], row_I, row_Q, W); + convert_to_RGB (im->r(row_to - 1, W - 1), im->g(row_to - 1, W - 1), im->b(row_to - 1, W - 1), rbconv_Y[cx][W - 1], rbout_I[cx][W - 1], rbout_Q[cx][W - 1]); } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index fad7e749e..567c78442 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -209,6 +209,7 @@ protected: void processFalseColorCorrection (Imagefloat* i, const int steps); inline void convert_row_to_YIQ (const float* const r, const float* const g, const float* const b, float* Y, float* I, float* Q, const int W); inline void convert_row_to_RGB (float* r, float* g, float* b, const float* const Y, const float* const I, const float* const Q, const int W); + inline void convert_to_RGB (float &r, float &g, float &b, const float &Y, const float &I, const float &Q); inline void convert_to_cielab_row (float* ar, float* ag, float* ab, float* oL, float* oa, float* ob); inline void interpolate_row_g (float* agh, float* agv, int i); diff --git a/rtengine/rawimagesource_i.h b/rtengine/rawimagesource_i.h index 83e834924..17bc8368a 100644 --- a/rtengine/rawimagesource_i.h +++ b/rtengine/rawimagesource_i.h @@ -51,6 +51,13 @@ inline void RawImageSource::convert_row_to_RGB (float* r, float* g, float* b, co } } +inline void RawImageSource::convert_to_RGB (float &r, float &g, float &b, const float &Y, const float &I, const float &Q) +{ + r = Y + 0.956f * I + 0.621f * Q; + g = Y - 0.272f * I - 0.647f * Q; + b = Y - 1.105f * I + 1.702f * Q; +} + inline void RawImageSource::convert_to_cielab_row (float* ar, float* ag, float* ab, float* oL, float* oa, float* ob) { From 6434460efb00a4e473a86c9665543cb505a6b795 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sun, 13 Mar 2016 23:23:32 +0100 Subject: [PATCH 6/6] removed benchmark code from raw false colour suppression --- rtengine/rawimagesource.cc | 3 --- rtengine/rawimagesource.h | 2 +- rtengine/rawimagesource_i.h | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index f7f4ec36e..29481fa58 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -33,8 +33,6 @@ #include "dcp.h" #include "rt_math.h" #include "improcfun.h" -#define BENCHMARK -#include "StopWatch.h" #ifdef _OPENMP #include #endif @@ -3546,7 +3544,6 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D // correction_YIQ_LQ void RawImageSource::processFalseColorCorrection (Imagefloat* im, const int steps) { - BENCHFUN if (im->height < 4 || steps < 1) { return; diff --git a/rtengine/rawimagesource.h b/rtengine/rawimagesource.h index 567c78442..124aa22d9 100644 --- a/rtengine/rawimagesource.h +++ b/rtengine/rawimagesource.h @@ -209,7 +209,7 @@ protected: void processFalseColorCorrection (Imagefloat* i, const int steps); inline void convert_row_to_YIQ (const float* const r, const float* const g, const float* const b, float* Y, float* I, float* Q, const int W); inline void convert_row_to_RGB (float* r, float* g, float* b, const float* const Y, const float* const I, const float* const Q, const int W); - inline void convert_to_RGB (float &r, float &g, float &b, const float &Y, const float &I, const float &Q); + inline void convert_to_RGB (float &r, float &g, float &b, const float Y, const float I, const float Q); inline void convert_to_cielab_row (float* ar, float* ag, float* ab, float* oL, float* oa, float* ob); inline void interpolate_row_g (float* agh, float* agv, int i); diff --git a/rtengine/rawimagesource_i.h b/rtengine/rawimagesource_i.h index 17bc8368a..f5685b0ef 100644 --- a/rtengine/rawimagesource_i.h +++ b/rtengine/rawimagesource_i.h @@ -51,7 +51,7 @@ inline void RawImageSource::convert_row_to_RGB (float* r, float* g, float* b, co } } -inline void RawImageSource::convert_to_RGB (float &r, float &g, float &b, const float &Y, const float &I, const float &Q) +inline void RawImageSource::convert_to_RGB (float &r, float &g, float &b, const float Y, const float I, const float Q) { r = Y + 0.956f * I + 0.621f * Q; g = Y - 0.272f * I - 0.647f * Q;