24% speedup for raw false colour suppression
This commit is contained in:
parent
4fdcfbd8df
commit
a55167bde8
@ -22,7 +22,6 @@
|
||||
#include "rawimagesource.h"
|
||||
#include "rawimagesource_i.h"
|
||||
#include "jaggedarray.h"
|
||||
#include "median.h"
|
||||
#include "rawimage.h"
|
||||
#include "mytime.h"
|
||||
#include "iccmatrices.h"
|
||||
|
@ -18,17 +18,8 @@
|
||||
*/
|
||||
#include "rt_math.h"
|
||||
|
||||
#define SORT3(a1,a2,a3,b1,b2,b3) \
|
||||
{ \
|
||||
b2 = min(a1,a2);\
|
||||
b1 = min(b2,a3);\
|
||||
b3 = max(a1,a2);\
|
||||
b2 = max(b2, min(b3,a3));\
|
||||
b3 = max(b3,a3);\
|
||||
}
|
||||
|
||||
|
||||
#define NETWORKSORT4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
|
||||
// middle 4 of 6 elements,
|
||||
#define MIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
|
||||
{\
|
||||
d1 = min(s1,s2);\
|
||||
d2 = max(s1,s2);\
|
||||
@ -44,23 +35,32 @@ d5 = max(s3,d5);\
|
||||
d3 = temp;\
|
||||
temp = min(d3,d4);\
|
||||
d4 = max(d3,d4);\
|
||||
d3 = temp;\
|
||||
d3 = max(d0,d3);\
|
||||
temp = min(d1,d4);\
|
||||
d4 = max(d1,d4);\
|
||||
d1 = temp;\
|
||||
d3 = max(d0,temp);\
|
||||
d2 = min(d2,d5);\
|
||||
temp = min(d2,d4);\
|
||||
d4 = max(d2,d4);\
|
||||
d2 = temp;\
|
||||
temp = min(d1,d3);\
|
||||
d3 = max(d1,d3);\
|
||||
d1 = temp;\
|
||||
temp = min(d2,d3);\
|
||||
d3 = max(d2,d3);\
|
||||
d2 = temp;\
|
||||
}
|
||||
|
||||
// middle 4 of 6 elements,
|
||||
#define VMIDDLE4OF6(s0,s1,s2,s3,s4,s5,d0,d1,d2,d3,d4,d5,temp) \
|
||||
{\
|
||||
d1 = vminf(s1,s2);\
|
||||
d2 = vmaxf(s1,s2);\
|
||||
d0 = vminf(s0,d2);\
|
||||
d2 = vmaxf(s0,d2);\
|
||||
temp = vminf(d0,d1);\
|
||||
d1 = vmaxf(d0,d1);\
|
||||
d0 = temp;\
|
||||
d4 = vminf(s4,s5);\
|
||||
d5 = vmaxf(s4,s5);\
|
||||
temp = vminf(s3,d5);\
|
||||
d5 = vmaxf(s3,d5);\
|
||||
d3 = temp;\
|
||||
temp = vminf(d3,d4);\
|
||||
d4 = vmaxf(d3,d4);\
|
||||
d3 = vmaxf(d0,temp);\
|
||||
d2 = vminf(d2,d5);\
|
||||
}
|
||||
|
||||
|
||||
#define MEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \
|
||||
{\
|
||||
t0 = min(s0,s5);\
|
||||
@ -77,13 +77,36 @@ t5 = max(t3,t5);\
|
||||
t3 = median;\
|
||||
median = min(t2,t6);\
|
||||
t6 = max(t2,t6);\
|
||||
t2 = median;\
|
||||
t3 = max(t2,t3);\
|
||||
t3 = max(median,t3);\
|
||||
t3 = min(t3,t6);\
|
||||
t4 = min(t4,t5);\
|
||||
median = min(t1,t4);\
|
||||
t4 = max(t1,t4);\
|
||||
t1 = median;\
|
||||
t3 = max(t1,t3);\
|
||||
t3 = max(median,t3);\
|
||||
median = min(t3,t4);\
|
||||
}
|
||||
|
||||
#define VMEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \
|
||||
{\
|
||||
t0 = vminf(s0,s5);\
|
||||
t5 = vmaxf(s0,s5);\
|
||||
t3 = vmaxf(t0,s3);\
|
||||
t0 = vminf(t0,s3);\
|
||||
t1 = vminf(s1,s6);\
|
||||
t6 = vmaxf(s1,s6);\
|
||||
t2 = vminf(s2,s4);\
|
||||
t4 = vmaxf(s2,s4);\
|
||||
t1 = vmaxf(t0,t1);\
|
||||
median = vminf(t3,t5);\
|
||||
t5 = vmaxf(t3,t5);\
|
||||
t3 = median;\
|
||||
median = vminf(t2,t6);\
|
||||
t6 = vmaxf(t2,t6);\
|
||||
t3 = vmaxf(median,t3);\
|
||||
t3 = vminf(t3,t6);\
|
||||
t4 = vminf(t4,t5);\
|
||||
median = vminf(t1,t4);\
|
||||
t4 = vmaxf(t1,t4);\
|
||||
t3 = vmaxf(median,t3);\
|
||||
median = vminf(t3,t4);\
|
||||
}
|
||||
|
@ -33,6 +33,8 @@
|
||||
#include "dcp.h"
|
||||
#include "rt_math.h"
|
||||
#include "improcfun.h"
|
||||
#define BENCHMARK
|
||||
#include "StopWatch.h"
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
@ -3387,20 +3389,24 @@ int RawImageSource::defTransform (int tran)
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
// Thread called part
|
||||
void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to)
|
||||
void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D<float> &rbconv_Y, array2D<float> &rbconv_I, array2D<float> &rbconv_Q, array2D<float> &rbout_I, array2D<float> &rbout_Q, const int row_from, const int row_to)
|
||||
{
|
||||
|
||||
int W = im->width;
|
||||
|
||||
array2D<float> rbconv_Y (W, 3);
|
||||
array2D<float> rbconv_I (W, 3);
|
||||
array2D<float> rbconv_Q (W, 3);
|
||||
array2D<float> rbout_I (W, 3);
|
||||
array2D<float> rbout_Q (W, 3);
|
||||
|
||||
float row_I[W];
|
||||
float row_Q[W];
|
||||
|
||||
#ifdef __SSE2__
|
||||
vfloat buffer[12];
|
||||
vfloat* pre1 = &buffer[0];
|
||||
vfloat* pre2 = &buffer[3];
|
||||
vfloat* post1 = &buffer[6];
|
||||
vfloat* post2 = &buffer[9];
|
||||
|
||||
vfloat middle[6];
|
||||
|
||||
#else
|
||||
float buffer[12];
|
||||
float* pre1 = &buffer[0];
|
||||
float* pre2 = &buffer[3];
|
||||
@ -3408,6 +3414,7 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
|
||||
float* post2 = &buffer[9];
|
||||
|
||||
float middle[6];
|
||||
#endif
|
||||
|
||||
int px = (row_from - 1) % 3, cx = row_from % 3, nx = 0;
|
||||
|
||||
@ -3427,23 +3434,56 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
|
||||
|
||||
convert_row_to_YIQ (im->r(i + 1), im->g(i + 1), im->b(i + 1), rbconv_Y[nx], rbconv_I[nx], rbconv_Q[nx], W);
|
||||
|
||||
#ifdef __SSE2__
|
||||
pre1[0] = _mm_setr_ps(rbconv_I[px][0], rbconv_Q[px][0], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][0], rbconv_Q[cx][0], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][0], rbconv_Q[nx][0], 0, 0);
|
||||
pre2[0] = _mm_setr_ps(rbconv_I[px][1], rbconv_Q[px][1], 0, 0) , pre1[1] = _mm_setr_ps(rbconv_I[cx][1], rbconv_Q[cx][1], 0, 0), pre1[2] = _mm_setr_ps(rbconv_I[nx][1], rbconv_Q[nx][1], 0, 0);
|
||||
vfloat temp[7];
|
||||
|
||||
// fill first element in rbout_I and rbout_Q
|
||||
rbout_I[cx][0] = rbconv_I[cx][0];
|
||||
rbout_Q[cx][0] = rbconv_Q[cx][0];
|
||||
|
||||
// median I channel
|
||||
for (int j = 1; j < W - 2; j += 2) {
|
||||
post1[0] = _mm_setr_ps(rbconv_I[px][j + 1], rbconv_Q[px][j + 1], 0, 0), post1[1] = _mm_setr_ps(rbconv_I[cx][j + 1], rbconv_Q[cx][j + 1], 0, 0), post1[2] = _mm_setr_ps(rbconv_I[nx][j + 1], rbconv_Q[nx][j + 1], 0, 0);
|
||||
VMIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
|
||||
vfloat medianval;
|
||||
VMEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval);
|
||||
rbout_I[cx][j] = medianval[0];
|
||||
rbout_Q[cx][j] = medianval[1];
|
||||
post2[0] = _mm_setr_ps(rbconv_I[px][j + 2], rbconv_Q[px][j + 2], 0, 0), post2[1] = _mm_setr_ps(rbconv_I[cx][j + 2], rbconv_Q[cx][j + 2], 0, 0), post2[2] = _mm_setr_ps(rbconv_I[nx][j + 2], rbconv_Q[nx][j + 2], 0, 0);
|
||||
VMEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval);
|
||||
rbout_I[cx][j + 1] = medianval[0];
|
||||
rbout_Q[cx][j + 1] = medianval[1];
|
||||
std::swap(pre1, post1);
|
||||
std::swap(pre2, post2);
|
||||
}
|
||||
|
||||
// fill last elements in rbout_I and rbout_Q
|
||||
rbout_I[cx][W - 1] = rbconv_I[cx][W - 1];
|
||||
rbout_I[cx][W - 2] = rbconv_I[cx][W - 2];
|
||||
rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1];
|
||||
rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2];
|
||||
|
||||
#else
|
||||
pre1[0] = rbconv_I[px][0], pre1[1] = rbconv_I[cx][0], pre1[2] = rbconv_I[nx][0];
|
||||
pre2[0] = rbconv_I[px][1], pre2[1] = rbconv_I[cx][1], pre2[2] = rbconv_I[nx][1];
|
||||
|
||||
float temp[7];
|
||||
|
||||
// fill first element in rbout_I
|
||||
rbout_I[cx][0] = rbconv_I[cx][0];
|
||||
|
||||
// median I channel
|
||||
for (int j = 1; j < W - 2; j += 2) {
|
||||
post1[0] = rbconv_I[px][j + 1], post1[1] = rbconv_I[cx][j + 1], post1[2] = rbconv_I[nx][j + 1];
|
||||
NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
|
||||
MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
|
||||
MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]);
|
||||
post2[0] = rbconv_I[px][j + 2], post2[1] = rbconv_I[cx][j + 2], post2[2] = rbconv_I[nx][j + 2];
|
||||
MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]);
|
||||
std::swap(pre1,post1);
|
||||
std::swap(pre2,post2);
|
||||
std::swap(pre1, post1);
|
||||
std::swap(pre2, post2);
|
||||
}
|
||||
|
||||
// fill last elements in rbout_I
|
||||
rbout_I[cx][W - 1] = rbconv_I[cx][W - 1];
|
||||
rbout_I[cx][W - 2] = rbconv_I[cx][W - 2];
|
||||
@ -3453,19 +3493,22 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
|
||||
|
||||
// fill first element in rbout_Q
|
||||
rbout_Q[cx][0] = rbconv_Q[cx][0];
|
||||
|
||||
// median Q channel
|
||||
for (int j = 1; j < W - 2; j += 2) {
|
||||
post1[0] = rbconv_Q[px][j + 1], post1[1] = rbconv_Q[cx][j + 1], post1[2] = rbconv_Q[nx][j + 1];
|
||||
NETWORKSORT4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
|
||||
MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]);
|
||||
MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]);
|
||||
post2[0] = rbconv_Q[px][j + 2], post2[1] = rbconv_Q[cx][j + 2], post2[2] = rbconv_Q[nx][j + 2];
|
||||
MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]);
|
||||
std::swap(pre1,post1);
|
||||
std::swap(pre2,post2);
|
||||
std::swap(pre1, post1);
|
||||
std::swap(pre2, post2);
|
||||
}
|
||||
|
||||
// fill last elements in rbout_Q
|
||||
rbout_Q[cx][W - 1] = rbconv_Q[cx][W - 1];
|
||||
rbout_Q[cx][W - 2] = rbconv_Q[cx][W - 2];
|
||||
#endif
|
||||
|
||||
// blur i-1th row
|
||||
if (i > row_from) {
|
||||
@ -3504,29 +3547,39 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, const i
|
||||
// correction_YIQ_LQ
|
||||
void RawImageSource::processFalseColorCorrection (Imagefloat* im, const int steps)
|
||||
{
|
||||
BENCHFUN
|
||||
|
||||
if (im->height < 4 || steps < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int t = 0; t < steps; t++) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
int blk = (im->height - 2) / nthreads;
|
||||
#pragma omp parallel
|
||||
{
|
||||
multi_array2D<float, 5> buffer (W, 3);
|
||||
int tid = omp_get_thread_num();
|
||||
int nthreads = omp_get_num_threads();
|
||||
int blk = (im->height - 2) / nthreads;
|
||||
|
||||
if (tid < nthreads - 1)
|
||||
{
|
||||
processFalseColorCorrectionThread (im, 1 + tid * blk, 1 + (tid + 1)*blk);
|
||||
} else
|
||||
{ processFalseColorCorrectionThread (im, 1 + tid * blk, im->height - 1); }
|
||||
for (int t = 0; t < steps; t++) {
|
||||
|
||||
if (tid < nthreads - 1) {
|
||||
processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, 1 + (tid + 1)*blk);
|
||||
} else {
|
||||
processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 + tid * blk, im->height - 1);
|
||||
}
|
||||
|
||||
#pragma omp barrier
|
||||
}
|
||||
#else
|
||||
processFalseColorCorrectionThread (im, 1 , im->height - 1);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
multi_array2D<float, 5> buffer (W, 3);
|
||||
|
||||
for (int t = 0; t < steps; t++) {
|
||||
processFalseColorCorrectionThread (im, buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 1 , im->height - 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
// Some camera input profiles need gamma preprocessing
|
||||
|
@ -95,7 +95,7 @@ protected:
|
||||
void hphd_vertical (float** hpmap, int col_from, int col_to);
|
||||
void hphd_horizontal (float** hpmap, int row_from, int row_to);
|
||||
void hphd_green (float** hpmap);
|
||||
void processFalseColorCorrectionThread (Imagefloat* im, const int row_from, const int row_to);
|
||||
void processFalseColorCorrectionThread (Imagefloat* im, array2D<float> &rbconv_Y, array2D<float> &rbconv_I, array2D<float> &rbconv_Q, array2D<float> &rbout_I, array2D<float> &rbout_Q, const int row_from, const int row_to);
|
||||
void hlRecovery (std::string method, float* red, float* green, float* blue, int i, int sx1, int width, int skip, const RAWParams &raw, float* hlmax);
|
||||
int defTransform (int tran);
|
||||
void transformRect (PreviewProps pp, int tran, int &sx1, int &sy1, int &width, int &height, int &fw);
|
||||
|
Loading…
x
Reference in New Issue
Block a user