24% speedup for raw false colour suppression

This commit is contained in:
heckflosse 2016-03-12 23:32:25 +01:00
parent 4fdcfbd8df
commit a55167bde8
4 changed files with 134 additions and 59 deletions

View File

@ -22,7 +22,6 @@
#include "rawimagesource.h" #include "rawimagesource.h"
#include "rawimagesource_i.h" #include "rawimagesource_i.h"
#include "jaggedarray.h" #include "jaggedarray.h"
#include "median.h"
#include "rawimage.h" #include "rawimage.h"
#include "mytime.h" #include "mytime.h"
#include "iccmatrices.h" #include "iccmatrices.h"

View File

@ -18,17 +18,8 @@
*/ */
#include "rt_math.h" #include "rt_math.h"
#define SORT3(a1,a2,a3,b1,b2,b3) \ // middle 4 of 6 elements,
{ \ #define MIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
b2 = min(a1,a2);\
b1 = min(b2,a3);\
b3 = max(a1,a2);\
b2 = max(b2, min(b3,a3));\
b3 = max(b3,a3);\
}
#define NETWORKSORT4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
{\ {\
d1 = min(s1,s2);\ d1 = min(s1,s2);\
d2 = max(s1,s2);\ d2 = max(s1,s2);\
@ -44,23 +35,32 @@ d5 = max(s3,d5);\
d3 = temp;\ d3 = temp;\
temp = min(d3,d4);\ temp = min(d3,d4);\
d4 = max(d3,d4);\ d4 = max(d3,d4);\
d3 = temp;\ d3 = max(d0,temp);\
d3 = max(d0,d3);\
temp = min(d1,d4);\
d4 = max(d1,d4);\
d1 = temp;\
d2 = min(d2,d5);\ d2 = min(d2,d5);\
temp = min(d2,d4);\
d4 = max(d2,d4);\
d2 = temp;\
temp = min(d1,d3);\
d3 = max(d1,d3);\
d1 = temp;\
temp = min(d2,d3);\
d3 = max(d2,d3);\
d2 = temp;\
} }
// middle 4 of 6 elements,
#define VMIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
{\
d1 = vminf(s1,s2);\
d2 = vmaxf(s1,s2);\
d0 = vminf(s0,d2);\
d2 = vmaxf(s0,d2);\
temp = vminf(d0,d1);\
d1 = vmaxf(d0,d1);\
d0 = temp;\
d4 = vminf(s4,s5);\
d5 = vmaxf(s4,s5);\
temp = vminf(s3,d5);\
d5 = vmaxf(s3,d5);\
d3 = temp;\
temp = vminf(d3,d4);\
d4 = vmaxf(d3,d4);\
d3 = vmaxf(d0,temp);\
d2 = vminf(d2,d5);\
}
#define MEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \ #define MEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \
{\ {\
t0 = min(s0,s5);\ t0 = min(s0,s5);\
@ -77,13 +77,36 @@ t5 = max(t3,t5);\
t3 = median;\ t3 = median;\
median = min(t2,t6);\ median = min(t2,t6);\
t6 = max(t2,t6);\ t6 = max(t2,t6);\
t2 = median;\ t3 = max(median,t3);\
t3 = max(t2,t3);\
t3 = min(t3,t6);\ t3 = min(t3,t6);\
t4 = min(t4,t5);\ t4 = min(t4,t5);\
median = min(t1,t4);\ median = min(t1,t4);\
t4 = max(t1,t4);\ t4 = max(t1,t4);\
t1 = median;\ t3 = max(median,t3);\
t3 = max(t1,t3);\
median = min(t3,t4);\ median = min(t3,t4);\
} }
#define VMEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \
{\
t0 = vminf(s0,s5);\
t5 = vmaxf(s0,s5);\
t3 = vmaxf(t0,s3);\
t0 = vminf(t0,s3);\
t1 = vminf(s1,s6);\
t6 = vmaxf(s1,s6);\
t2 = vminf(s2,s4);\
t4 = vmaxf(s2,s4);\
t1 = vmaxf(t0,t1);\
median = vminf(t3,t5);\
t5 = vmaxf(t3,t5);\
t3 = median;\
median = vminf(t2,t6);\
t6 = vmaxf(t2,t6);\
t3 = vmaxf(median,t3);\
t3 = vminf(t3,t6);\
t4 = vminf(t4,t5);\
median = vminf(t1,t4);\
t4 = vmaxf(t1,t4);\
t3 = vmaxf(median,t3);\
median = vminf(t3,t4);\
}

View File

@ -33,6 +33,8 @@
#include "dcp.h" #include "dcp.h"
#include "rt_math.h" #include "rt_math.h"
#include "improcfun.h" #include "improcfun.h"
#define BENCHMARK
#include "StopWatch.h"
#ifdef _OPENMP #ifdef _OPENMP
#include <omp.h> #include <omp.h>
#endif #endif
@ -3387,20 +3389,24 @@ int RawImageSource::defTransform (int tran)
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// Thread called part // Thread called part
void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to) void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D<float> &rbconv_Y, array2D<float> &rbconv_I, array2D<float> &rbconv_Q, array2D<float> &rbout_I, array2D<float> &rbout_Q, const int row_from, const int row_to)
{ {
int W = im->width; int W = im->width;
array2D<float> rbconv_Y (W, 3);
array2D<float> rbconv_I (W, 3);
array2D<float> rbconv_Q (W, 3);
array2D<float> rbout_I (W, 3);
array2D<float> rbout_Q (W, 3);
float row_I[W]; float row_I[W];
float row_Q[W]; float row_Q[W];
#ifdef __SSE2__
vfloat buffer[12];
vfloat* pre1 = &buffer[0];
vfloat* pre2 = &buffer[3];
vfloat* post1 = &buffer[6];
vfloat* post2 = &buffer[9];
vfloat middle[6];
#else
float buffer[12]; float buffer[12];
float* pre1 = &buffer[0]; float* pre1 = &buffer[0];
float* pre2 = &buffer[3]; float* pre2 = &buffer[3];
@ -3408,6 +3414,7 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
float* post2 = &buffer[9]; float* post2 = &buffer[9];
float middle[6]; float middle[6];
#endif
int px = (row_from - 1) % 3, cx = row_from % 3, nx = 0; int px = (row_from - 1) % 3, cx = row_from % 3, nx = 0;
@ -3427,23 +3434,56 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
convert_row_to_YIQ (im->r(i + 1), im->g(i + 1), im->b(i + 1), rbconv_Y[nx], rbconv_I[nx], rbconv_Q[nx], W); convert_row_to_YIQ (im->r(i + 1), im->g(i + 1), im->b(i + 1), rbconv_Y[nx], rbconv_I[nx], rbconv_Q[nx], W);
#ifdef __SSE2__
pre1[0] = _mm_setr_ps(rbconv_I[px][0], rbconv_Q[px][0], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][0], rbconv_Q[cx][0], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][0], rbconv_Q[nx][0], 0, 0);
pre2[0] = _mm_setr_ps(rbconv_I[px][1], rbconv_Q[px][1], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][1], rbconv_Q[cx][1], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][1], rbconv_Q[nx][1], 0, 0);
vfloat temp[7];
// fill first element in rbout_I and rbout_Q
rbout_I[cx][0] = rbconv_I[cx][0];
rbout_Q[cx][0] = rbconv_Q[cx][0];
// median I channel
for (int j = 1; j < W - 2; j += 2) {
post1[0] = _mm_setr_ps(rbconv_I[px][j + 1], rbconv_Q[px][j + 1], 0, 0), post1[1] = _mm_setr_ps(rbconv_I[cx][j + 1], rbconv_Q[cx][j + 1], 0, 0), post1[2] = _mm_setr_ps(rbconv_I[nx][j + 1], rbconv_Q[nx][j + 1], 0, 0);
VMIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
vfloat medianval;
VMEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval);
rbout_I[cx][j] = medianval[0];
rbout_Q[cx][j] = medianval[1];
post2[0] = _mm_setr_ps(rbconv_I[px][j + 2], rbconv_Q[px][j + 2], 0, 0), post2[1] = _mm_setr_ps(rbconv_I[cx][j + 2], rbconv_Q[cx][j + 2], 0, 0), post2[2] = _mm_setr_ps(rbconv_I[nx][j + 2], rbconv_Q[nx][j + 2], 0, 0);
VMEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval);
rbout_I[cx][j + 1] = medianval[0];
rbout_Q[cx][j + 1] = medianval[1];
std::swap(pre1, post1);
std::swap(pre2, post2);
}
// fill last elements in rbout_I and rbout_Q
rbout_I[cx][W - 1] = rbconv_I[cx][W - 1];
rbout_I[cx][W - 2] = rbconv_I[cx][W - 2];
rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1];
rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2];
#else
pre1[0] = rbconv_I[px][0], pre1[1] = rbconv_I[cx][0], pre1[2] = rbconv_I[nx][0]; pre1[0] = rbconv_I[px][0], pre1[1] = rbconv_I[cx][0], pre1[2] = rbconv_I[nx][0];
pre2[0] = rbconv_I[px][1], pre2[1] = rbconv_I[cx][1], pre2[2] = rbconv_I[nx][1]; pre2[0] = rbconv_I[px][1], pre2[1] = rbconv_I[cx][1], pre2[2] = rbconv_I[nx][1];
float temp[7]; float temp[7];
// fill first element in rbout_I // fill first element in rbout_I
rbout_I[cx][0] = rbconv_I[cx][0]; rbout_I[cx][0] = rbconv_I[cx][0];
// median I channel // median I channel
for (int j = 1; j < W - 2; j += 2) { for (int j = 1; j < W - 2; j += 2) {
post1[0] = rbconv_I[px][j + 1], post1[1] = rbconv_I[cx][j + 1], post1[2] = rbconv_I[nx][j + 1]; post1[0] = rbconv_I[px][j + 1], post1[1] = rbconv_I[cx][j + 1], post1[2] = rbconv_I[nx][j + 1];
NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]); MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]);
post2[0] = rbconv_I[px][j + 2], post2[1] = rbconv_I[cx][j + 2], post2[2] = rbconv_I[nx][j + 2]; post2[0] = rbconv_I[px][j + 2], post2[1] = rbconv_I[cx][j + 2], post2[2] = rbconv_I[nx][j + 2];
MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]); MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]);
std::swap(pre1,post1); std::swap(pre1, post1);
std::swap(pre2,post2); std::swap(pre2, post2);
} }
// fill last elements in rbout_I // fill last elements in rbout_I
rbout_I[cx][W - 1] = rbconv_I[cx][W - 1]; rbout_I[cx][W - 1] = rbconv_I[cx][W - 1];
rbout_I[cx][W - 2] = rbconv_I[cx][W - 2]; rbout_I[cx][W - 2] = rbconv_I[cx][W - 2];
@ -3453,19 +3493,22 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
// fill first element in rbout_Q // fill first element in rbout_Q
rbout_Q[cx][0] = rbconv_Q[cx][0]; rbout_Q[cx][0] = rbconv_Q[cx][0];
// median Q channel // median Q channel
for (int j = 1; j < W - 2; j += 2) { for (int j = 1; j < W - 2; j += 2) {
post1[0] = rbconv_Q[px][j + 1], post1[1] = rbconv_Q[cx][j + 1], post1[2] = rbconv_Q[nx][j + 1]; post1[0] = rbconv_Q[px][j + 1], post1[1] = rbconv_Q[cx][j + 1], post1[2] = rbconv_Q[nx][j + 1];
NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]); MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]);
post2[0] = rbconv_Q[px][j + 2], post2[1] = rbconv_Q[cx][j + 2], post2[2] = rbconv_Q[nx][j + 2]; post2[0] = rbconv_Q[px][j + 2], post2[1] = rbconv_Q[cx][j + 2], post2[2] = rbconv_Q[nx][j + 2];
MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]); MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]);
std::swap(pre1,post1); std::swap(pre1, post1);
std::swap(pre2,post2); std::swap(pre2, post2);
} }
// fill last elements in rbout_Q // fill last elements in rbout_Q
rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1]; rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1];
rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2]; rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2];
#endif
// blur i-1th row // blur i-1th row
if (i > row_from) { if (i > row_from) {
@ -3504,29 +3547,39 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
// correction_YIQ_LQ // correction_YIQ_LQ
void RawImageSource::processFalseColorCorrection (Imagefloat* im, const int steps) void RawImageSource::processFalseColorCorrection (Imagefloat* im, const int steps)
{ {
BENCHFUN
if (im->height < 4 || steps < 1) { if (im->height < 4 || steps < 1) {
return; return;
} }
for (int t = 0; t < steps; t++) {
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel #pragma omp parallel
{ {
multi_array2D<float, 5> buffer (W, 3);
int tid = omp_get_thread_num(); int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads(); int nthreads = omp_get_num_threads();
int blk = (im->height - 2) / nthreads; int blk = (im->height - 2) / nthreads;
if (tid < nthreads - 1) for (int t = 0; t < steps; t++) {
{
processFalseColorCorrectionThread (im, 1 + tid * blk, 1 + (tid + 1)*blk); if (tid < nthreads - 1) {
} else processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, 1 + (tid + 1)*blk);
{ processFalseColorCorrectionThread (im, 1 + tid * blk, im->height - 1); } } else {
processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, im->height - 1);
}
#pragma omp barrier
}
} }
#else #else
processFalseColorCorrectionThread (im, 1 , im->height - 1); multi_array2D<float, 5> buffer (W, 3);
#endif
for (int t = 0; t < steps; t++) {
processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 , im->height - 1);
} }
#endif
} }
// Some camera input profiles need gamma preprocessing // Some camera input profiles need gamma preprocessing

View File

@ -95,7 +95,7 @@ protected:
void hphd_vertical (float** hpmap, int col_from, int col_to); void hphd_vertical (float** hpmap, int col_from, int col_to);
void hphd_horizontal (float** hpmap, int row_from, int row_to); void hphd_horizontal (float** hpmap, int row_from, int row_to);
void hphd_green (float** hpmap); void hphd_green (float** hpmap);
void processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to); void processFalseColorCorrectionThread (Imagefloat* im, array2D<float> &rbconv_Y, array2D<float> &rbconv_I, array2D<float> &rbconv_Q, array2D<float> &rbout_I, array2D<float> &rbout_Q, const int row_from, const int row_to);
void hlRecovery (std::string method, float* red, float* green, float* blue, int i, int sx1, int width, int skip, const RAWParams &raw, float* hlmax); void hlRecovery (std::string method, float* red, float* green, float* blue, int i, int sx1, int width, int skip, const RAWParams &raw, float* hlmax);
int defTransform (int tran); int defTransform (int tran);
void transformRect (PreviewProps pp, int tran, int &sx1, int &sy1, int &width, int &height, int &fw); void transformRect (PreviewProps pp, int tran, int &sx1, int &sy1, int &width, int &height, int &fw);