24% speedup for raw false colour suppression

This commit is contained in:
heckflosse 2016-03-12 23:32:25 +01:00
parent 4fdcfbd8df
commit a55167bde8
4 changed files with 134 additions and 59 deletions

View File

@ -22,7 +22,6 @@
#include "rawimagesource.h"
#include "rawimagesource_i.h"
#include "jaggedarray.h"
#include "median.h"
#include "rawimage.h"
#include "mytime.h"
#include "iccmatrices.h"

View File

@ -18,17 +18,8 @@
*/
#include "rt_math.h"
#define SORT3(a1,a2,a3,b1,b2,b3) \
{ \
b2 = min(a1,a2);\
b1 = min(b2,a3);\
b3 = max(a1,a2);\
b2 = max(b2, min(b3,a3));\
b3 = max(b3,a3);\
}
#define NETWORKSORT4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
// middle 4 of 6 elements,
#define MIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
{\
d1 = min(s1,s2);\
d2 = max(s1,s2);\
@ -44,23 +35,32 @@ d5 = max(s3,d5);\
d3 = temp;\
temp = min(d3,d4);\
d4 = max(d3,d4);\
d3 = temp;\
d3 = max(d0,d3);\
temp = min(d1,d4);\
d4 = max(d1,d4);\
d1 = temp;\
d3 = max(d0,temp);\
d2 = min(d2,d5);\
temp = min(d2,d4);\
d4 = max(d2,d4);\
d2 = temp;\
temp = min(d1,d3);\
d3 = max(d1,d3);\
d1 = temp;\
temp = min(d2,d3);\
d3 = max(d2,d3);\
d2 = temp;\
}
// middle 4 of 6 elements,
#define VMIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
{\
d1 = vminf(s1,s2);\
d2 = vmaxf(s1,s2);\
d0 = vminf(s0,d2);\
d2 = vmaxf(s0,d2);\
temp = vminf(d0,d1);\
d1 = vmaxf(d0,d1);\
d0 = temp;\
d4 = vminf(s4,s5);\
d5 = vmaxf(s4,s5);\
temp = vminf(s3,d5);\
d5 = vmaxf(s3,d5);\
d3 = temp;\
temp = vminf(d3,d4);\
d4 = vmaxf(d3,d4);\
d3 = vmaxf(d0,temp);\
d2 = vminf(d2,d5);\
}
#define MEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \
{\
t0 = min(s0,s5);\
@ -77,13 +77,36 @@ t5 = max(t3,t5);\
t3 = median;\
median = min(t2,t6);\
t6 = max(t2,t6);\
t2 = median;\
t3 = max(t2,t3);\
t3 = max(median,t3);\
t3 = min(t3,t6);\
t4 = min(t4,t5);\
median = min(t1,t4);\
t4 = max(t1,t4);\
t1 = median;\
t3 = max(t1,t3);\
t3 = max(median,t3);\
median = min(t3,t4);\
}
#define VMEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \
{\
t0 = vminf(s0,s5);\
t5 = vmaxf(s0,s5);\
t3 = vmaxf(t0,s3);\
t0 = vminf(t0,s3);\
t1 = vminf(s1,s6);\
t6 = vmaxf(s1,s6);\
t2 = vminf(s2,s4);\
t4 = vmaxf(s2,s4);\
t1 = vmaxf(t0,t1);\
median = vminf(t3,t5);\
t5 = vmaxf(t3,t5);\
t3 = median;\
median = vminf(t2,t6);\
t6 = vmaxf(t2,t6);\
t3 = vmaxf(median,t3);\
t3 = vminf(t3,t6);\
t4 = vminf(t4,t5);\
median = vminf(t1,t4);\
t4 = vmaxf(t1,t4);\
t3 = vmaxf(median,t3);\
median = vminf(t3,t4);\
}

View File

@ -33,6 +33,8 @@
#include "dcp.h"
#include "rt_math.h"
#include "improcfun.h"
#define BENCHMARK
#include "StopWatch.h"
#ifdef _OPENMP
#include <omp.h>
#endif
@ -3387,20 +3389,24 @@ int RawImageSource::defTransform (int tran)
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// Thread called part
void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to)
void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D<float> &rbconv_Y, array2D<float> &rbconv_I, array2D<float> &rbconv_Q, array2D<float> &rbout_I, array2D<float> &rbout_Q, const int row_from, const int row_to)
{
int W = im->width;
array2D<float> rbconv_Y (W, 3);
array2D<float> rbconv_I (W, 3);
array2D<float> rbconv_Q (W, 3);
array2D<float> rbout_I (W, 3);
array2D<float> rbout_Q (W, 3);
float row_I[W];
float row_Q[W];
#ifdef __SSE2__
vfloat buffer[12];
vfloat* pre1 = &buffer[0];
vfloat* pre2 = &buffer[3];
vfloat* post1 = &buffer[6];
vfloat* post2 = &buffer[9];
vfloat middle[6];
#else
float buffer[12];
float* pre1 = &buffer[0];
float* pre2 = &buffer[3];
@ -3408,6 +3414,7 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
float* post2 = &buffer[9];
float middle[6];
#endif
int px = (row_from - 1) % 3, cx = row_from % 3, nx = 0;
@ -3427,23 +3434,56 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
convert_row_to_YIQ (im->r(i + 1), im->g(i + 1), im->b(i + 1), rbconv_Y[nx], rbconv_I[nx], rbconv_Q[nx], W);
#ifdef __SSE2__
pre1[0] = _mm_setr_ps(rbconv_I[px][0], rbconv_Q[px][0], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][0], rbconv_Q[cx][0], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][0], rbconv_Q[nx][0], 0, 0);
pre2[0] = _mm_setr_ps(rbconv_I[px][1], rbconv_Q[px][1], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][1], rbconv_Q[cx][1], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][1], rbconv_Q[nx][1], 0, 0);
vfloat temp[7];
// fill first element in rbout_I and rbout_Q
rbout_I[cx][0] = rbconv_I[cx][0];
rbout_Q[cx][0] = rbconv_Q[cx][0];
// median I channel
for (int j = 1; j < W - 2; j += 2) {
post1[0] = _mm_setr_ps(rbconv_I[px][j + 1], rbconv_Q[px][j + 1], 0, 0), post1[1] = _mm_setr_ps(rbconv_I[cx][j + 1], rbconv_Q[cx][j + 1], 0, 0), post1[2] = _mm_setr_ps(rbconv_I[nx][j + 1], rbconv_Q[nx][j + 1], 0, 0);
VMIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
vfloat medianval;
VMEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval);
rbout_I[cx][j] = medianval[0];
rbout_Q[cx][j] = medianval[1];
post2[0] = _mm_setr_ps(rbconv_I[px][j + 2], rbconv_Q[px][j + 2], 0, 0), post2[1] = _mm_setr_ps(rbconv_I[cx][j + 2], rbconv_Q[cx][j + 2], 0, 0), post2[2] = _mm_setr_ps(rbconv_I[nx][j + 2], rbconv_Q[nx][j + 2], 0, 0);
VMEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval);
rbout_I[cx][j + 1] = medianval[0];
rbout_Q[cx][j + 1] = medianval[1];
std::swap(pre1, post1);
std::swap(pre2, post2);
}
// fill last elements in rbout_I and rbout_Q
rbout_I[cx][W - 1] = rbconv_I[cx][W - 1];
rbout_I[cx][W - 2] = rbconv_I[cx][W - 2];
rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1];
rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2];
#else
pre1[0] = rbconv_I[px][0], pre1[1] = rbconv_I[cx][0], pre1[2] = rbconv_I[nx][0];
pre2[0] = rbconv_I[px][1], pre2[1] = rbconv_I[cx][1], pre2[2] = rbconv_I[nx][1];
float temp[7];
// fill first element in rbout_I
rbout_I[cx][0] = rbconv_I[cx][0];
// median I channel
for (int j = 1; j < W - 2; j += 2) {
post1[0] = rbconv_I[px][j + 1], post1[1] = rbconv_I[cx][j + 1], post1[2] = rbconv_I[nx][j + 1];
NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]);
post2[0] = rbconv_I[px][j + 2], post2[1] = rbconv_I[cx][j + 2], post2[2] = rbconv_I[nx][j + 2];
MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]);
std::swap(pre1,post1);
std::swap(pre2,post2);
std::swap(pre1, post1);
std::swap(pre2, post2);
}
// fill last elements in rbout_I
rbout_I[cx][W - 1] = rbconv_I[cx][W - 1];
rbout_I[cx][W - 2] = rbconv_I[cx][W - 2];
@ -3453,19 +3493,22 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
// fill first element in rbout_Q
rbout_Q[cx][0] = rbconv_Q[cx][0];
// median Q channel
for (int j = 1; j < W - 2; j += 2) {
post1[0] = rbconv_Q[px][j + 1], post1[1] = rbconv_Q[cx][j + 1], post1[2] = rbconv_Q[nx][j + 1];
NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]);
post2[0] = rbconv_Q[px][j + 2], post2[1] = rbconv_Q[cx][j + 2], post2[2] = rbconv_Q[nx][j + 2];
MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]);
std::swap(pre1,post1);
std::swap(pre2,post2);
std::swap(pre1, post1);
std::swap(pre2, post2);
}
// fill last elements in rbout_Q
rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1];
rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2];
#endif
// blur i-1th row
if (i > row_from) {
@ -3504,29 +3547,39 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
// correction_YIQ_LQ
void RawImageSource::processFalseColorCorrection (Imagefloat* im, const int steps)
{
BENCHFUN
if (im->height < 4 || steps < 1) {
return;
}
for (int t = 0; t < steps; t++) {
#ifdef _OPENMP
#pragma omp parallel
{
multi_array2D<float, 5> buffer (W, 3);
int tid = omp_get_thread_num();
int nthreads = omp_get_num_threads();
int blk = (im->height - 2) / nthreads;
if (tid < nthreads - 1)
{
processFalseColorCorrectionThread (im, 1 + tid * blk, 1 + (tid + 1)*blk);
} else
{ processFalseColorCorrectionThread (im, 1 + tid * blk, im->height - 1); }
for (int t = 0; t < steps; t++) {
if (tid < nthreads - 1) {
processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, 1 + (tid + 1)*blk);
} else {
processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, im->height - 1);
}
#pragma omp barrier
}
}
#else
processFalseColorCorrectionThread (im, 1 , im->height - 1);
#endif
multi_array2D<float, 5> buffer (W, 3);
for (int t = 0; t < steps; t++) {
processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 , im->height - 1);
}
#endif
}
// Some camera input profiles need gamma preprocessing

View File

@ -95,7 +95,7 @@ protected:
void hphd_vertical (float** hpmap, int col_from, int col_to);
void hphd_horizontal (float** hpmap, int row_from, int row_to);
void hphd_green (float** hpmap);
void processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to);
void processFalseColorCorrectionThread (Imagefloat* im, array2D<float> &rbconv_Y, array2D<float> &rbconv_I, array2D<float> &rbconv_Q, array2D<float> &rbout_I, array2D<float> &rbout_Q, const int row_from, const int row_to);
void hlRecovery (std::string method, float* red, float* green, float* blue, int i, int sx1, int width, int skip, const RAWParams &raw, float* hlmax);
int defTransform (int tran);
void transformRect (PreviewProps pp, int tran, int &sx1, int &sy1, int &width, int &height, int &fw);