small speeduo for denoise
This commit is contained in:
parent
a09e319216
commit
25625c6842
@ -42,6 +42,7 @@
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#define BENCHMARK
|
||||
#include "StopWatch.h"
|
||||
|
||||
#define TS 64 // Tile size
|
||||
@ -839,6 +840,12 @@ BENCHFUN
|
||||
{static_cast<float>(wprof[2][0]), static_cast<float>(wprof[2][1]), static_cast<float>(wprof[2][2])}
|
||||
};
|
||||
|
||||
const float wpfast[3][3] = {
|
||||
{static_cast<float>(wprof[0][0]) / Color::D50x, static_cast<float>(wprof[0][1]) / Color::D50x, static_cast<float>(wprof[0][2]) / Color::D50x},
|
||||
{static_cast<float>(wprof[1][0]), static_cast<float>(wprof[1][1]), static_cast<float>(wprof[1][2])},
|
||||
{static_cast<float>(wprof[2][0]) / Color::D50z, static_cast<float>(wprof[2][1]) / Color::D50z, static_cast<float>(wprof[2][2]) / Color::D50z}
|
||||
};
|
||||
|
||||
// begin tile processing of image
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel num_threads(numthreads) if (numthreads>1)
|
||||
@ -925,51 +932,38 @@ BENCHFUN
|
||||
if (!denoiseMethodRgb) { //lab mode
|
||||
//modification Jacques feb 2013 and july 2014
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||
#pragma omp parallel for schedule(dynamic,16) num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||
#endif
|
||||
|
||||
for (int i = tiletop; i < tilebottom; ++i) {
|
||||
int i1 = i - tiletop;
|
||||
const int i1 = i - tiletop;
|
||||
|
||||
for (int j = tileleft; j < tileright; ++j) {
|
||||
int j1 = j - tileleft;
|
||||
float R_ = gain * src->r(i, j);
|
||||
float G_ = gain * src->g(i, j);
|
||||
float B_ = gain * src->b(i, j);
|
||||
const int j1 = j - tileleft;
|
||||
|
||||
R_ = Color::denoiseIGammaTab[R_];
|
||||
G_ = Color::denoiseIGammaTab[G_];
|
||||
B_ = Color::denoiseIGammaTab[B_];
|
||||
const float R_ = Color::denoiseIGammaTab[gain * src->r(i, j)];
|
||||
const float G_ = Color::denoiseIGammaTab[gain * src->g(i, j)];
|
||||
const float B_ = Color::denoiseIGammaTab[gain * src->b(i, j)];
|
||||
|
||||
//apply gamma noise standard (slider)
|
||||
R_ = R_ < 65535.f ? gamcurve[R_] : (Color::gammanf(R_ / 65535.f, gam) * 32768.f);
|
||||
G_ = G_ < 65535.f ? gamcurve[G_] : (Color::gammanf(G_ / 65535.f, gam) * 32768.f);
|
||||
B_ = B_ < 65535.f ? gamcurve[B_] : (Color::gammanf(B_ / 65535.f, gam) * 32768.f);
|
||||
|
||||
//true conversion xyz=>Lab
|
||||
float X, Y, Z;
|
||||
Color::rgbxyz(R_, G_, B_, X, Y, Z, wp);
|
||||
|
||||
//convert to Lab
|
||||
float L, a, b;
|
||||
Color::XYZ2Lab(X, Y, Z, L, a, b);
|
||||
|
||||
labdn->L[i1][j1] = L;
|
||||
labdn->a[i1][j1] = a;
|
||||
labdn->b[i1][j1] = b;
|
||||
labdn->L[i1][j1] = R_ < 65535.f ? gamcurve[R_] : Color::gammanf(R_ / 65535.f, gam) * 32768.f;
|
||||
labdn->a[i1][j1] = G_ < 65535.f ? gamcurve[G_] : Color::gammanf(G_ / 65535.f, gam) * 32768.f;
|
||||
labdn->b[i1][j1] = B_ < 65535.f ? gamcurve[B_] : Color::gammanf(B_ / 65535.f, gam) * 32768.f;
|
||||
|
||||
if (((i1 | j1) & 1) == 0) {
|
||||
if (numTries == 1) {
|
||||
noisevarlum[(i1 >> 1)*width2 + (j1 >> 1)] = useNoiseLCurve ? lumcalc[i >> 1][j >> 1] : noisevarL;
|
||||
noisevarchrom[(i1 >> 1)*width2 + (j1 >> 1)] = useNoiseCCurve ? maxNoiseVarab * ccalc[i >> 1][j >> 1] : 1.f;
|
||||
noisevarlum[(i1 >> 1) * width2 + (j1 >> 1)] = useNoiseLCurve ? lumcalc[i >> 1][j >> 1] : noisevarL;
|
||||
noisevarchrom[(i1 >> 1) * width2 + (j1 >> 1)] = useNoiseCCurve ? maxNoiseVarab * ccalc[i >> 1][j >> 1] : 1.f;
|
||||
} else {
|
||||
noisevarlum[(i1 >> 1)*width2 + (j1 >> 1)] = lumcalc[i >> 1][j >> 1];
|
||||
noisevarchrom[(i1 >> 1)*width2 + (j1 >> 1)] = ccalc[i >> 1][j >> 1];
|
||||
noisevarlum[(i1 >> 1) * width2 + (j1 >> 1)] = lumcalc[i >> 1][j >> 1];
|
||||
noisevarchrom[(i1 >> 1) * width2 + (j1 >> 1)] = ccalc[i >> 1][j >> 1];
|
||||
}
|
||||
}
|
||||
|
||||
//end chroma
|
||||
}
|
||||
//true conversion xyz=>Lab
|
||||
Color::RGB2Lab(labdn->L[i1], labdn->a[i1], labdn->b[i1], labdn->L[i1], labdn->a[i1], labdn->b[i1], wpfast, width);
|
||||
}
|
||||
} else {//RGB mode
|
||||
#ifdef _OPENMP
|
||||
@ -1605,27 +1599,13 @@ BENCHFUN
|
||||
|
||||
for (int i = tiletop; i < tilebottom; ++i) {
|
||||
int i1 = i - tiletop;
|
||||
|
||||
//true conversion Lab==>xyz
|
||||
Color::Lab2RGBLimit(labdn->L[i1], labdn->a[i1], labdn->b[i1], labdn->L[i1], labdn->a[i1], labdn->b[i1], wip, 9000000.f, 1.f + qhighFactor * realred, 1.f + qhighFactor * realblue, width);
|
||||
for (int j = tileleft; j < tileright; ++j) {
|
||||
int j1 = j - tileleft;
|
||||
//modification Jacques feb 2013
|
||||
//true conversion Lab==>xyz
|
||||
float L = labdn->L[i1][j1];
|
||||
float a = labdn->a[i1][j1];
|
||||
float b = labdn->b[i1][j1];
|
||||
float c_h = SQR(a) + SQR(b);
|
||||
|
||||
if (c_h > 9000000.f) {
|
||||
a *= 1.f + qhighFactor * realred;
|
||||
b *= 1.f + qhighFactor * realblue;
|
||||
}
|
||||
|
||||
//convert XYZ
|
||||
float X, Y, Z;
|
||||
Color::Lab2XYZ(L, a, b, X, Y, Z);
|
||||
//apply inverse gamma noise
|
||||
float r_, g_, b_;
|
||||
Color::xyz2rgb(X, Y, Z, r_, g_, b_, wip);
|
||||
float r_ = labdn->L[i1][j1];
|
||||
float g_ = labdn->a[i1][j1];
|
||||
float b_ = labdn->b[i1][j1];
|
||||
//inverse gamma standard (slider)
|
||||
r_ = r_ < 32768.f ? igamcurve[r_] : (Color::gammanf(r_ / 32768.f, igam) * 65535.f);
|
||||
g_ = g_ < 32768.f ? igamcurve[g_] : (Color::gammanf(g_ / 32768.f, igam) * 65535.f);
|
||||
|
@ -1771,10 +1771,10 @@ void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b,
|
||||
{
|
||||
|
||||
#ifdef __SSE2__
|
||||
vfloat minvalfv = F2V(0.f);
|
||||
vfloat maxvalfv = F2V(MAXVALF);
|
||||
vfloat c500v = F2V(500.f);
|
||||
vfloat c200v = F2V(200.f);
|
||||
const vfloat minvalfv = ZEROV;
|
||||
const vfloat maxvalfv = F2V(MAXVALF);
|
||||
const vfloat c500v = F2V(500.f);
|
||||
const vfloat c200v = F2V(200.f);
|
||||
#endif
|
||||
int i = 0;
|
||||
|
||||
@ -1787,9 +1787,7 @@ void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b,
|
||||
const vfloat yv = F2V(wp[1][0]) * rv + F2V(wp[1][1]) * gv + F2V(wp[1][2]) * bv;
|
||||
const vfloat zv = F2V(wp[2][0]) * rv + F2V(wp[2][1]) * gv + F2V(wp[2][2]) * bv;
|
||||
|
||||
vmask maxMask = vmaskf_gt(vmaxf(xv, vmaxf(yv, zv)), maxvalfv);
|
||||
vmask minMask = vmaskf_lt(vminf(xv, vminf(yv, zv)), minvalfv);
|
||||
if (_mm_movemask_ps((vfloat)maxMask) || _mm_movemask_ps((vfloat)minMask)) {
|
||||
if (_mm_movemask_ps((vfloat)vorm(vmaskf_gt(vmaxf(xv, vmaxf(yv, zv)), maxvalfv), vmaskf_lt(vminf(xv, vminf(yv, zv)), minvalfv)))) {
|
||||
// take slower code path for all 4 pixels if one of the values is > MAXVALF. Still faster than non SSE2 version
|
||||
for(int k = 0; k < 4; ++k) {
|
||||
float x = xv[k];
|
||||
@ -1872,6 +1870,51 @@ void Color::RGB2L(float *R, float *G, float *B, float *L, const float wp[3][3],
|
||||
}
|
||||
}
|
||||
|
||||
void Color::Lab2RGBLimit(float *L, float *a, float *b, float *R, float *G, float *B, const float wp[3][3], float limit, float afactor, float bfactor, int width)
|
||||
{
|
||||
|
||||
int i = 0;
|
||||
|
||||
#ifdef __SSE2__
|
||||
const vfloat wpv[3][3] = {
|
||||
{F2V(wp[0][0]), F2V(wp[0][1]), F2V(wp[0][2])},
|
||||
{F2V(wp[1][0]), F2V(wp[1][1]), F2V(wp[1][2])},
|
||||
{F2V(wp[2][0]), F2V(wp[2][1]), F2V(wp[2][2])}
|
||||
};
|
||||
const vfloat limitv = F2V(limit);
|
||||
const vfloat afactorv = F2V(afactor);
|
||||
const vfloat bfactorv = F2V(bfactor);
|
||||
|
||||
for(;i < width - 3; i+=4) {
|
||||
const vfloat Lv = LVFU(L[i]);
|
||||
vfloat av = LVFU(a[i]);
|
||||
vfloat bv = LVFU(b[i]);
|
||||
|
||||
const vmask mask = vmaskf_gt(SQRV(av) + SQRV(bv), limitv);
|
||||
av = vself(mask, av * afactorv, av);
|
||||
bv = vself(mask, bv * bfactorv, bv);
|
||||
vfloat Xv, Yv, Zv;
|
||||
Lab2XYZ(Lv, av, bv, Xv, Yv, Zv);
|
||||
vfloat Rv, Gv, Bv;
|
||||
xyz2rgb(Xv, Yv, Zv, Rv, Gv, Bv, wpv);
|
||||
STVFU(R[i], Rv);
|
||||
STVFU(G[i], Gv);
|
||||
STVFU(B[i], Bv);
|
||||
}
|
||||
#endif
|
||||
for(;i < width; ++i) {
|
||||
float X, Y, Z;
|
||||
float av = a[i];
|
||||
float bv = b[i];
|
||||
if (SQR(av) + SQR(bv) > limit) {
|
||||
av *= afactor;
|
||||
bv *= bfactor;
|
||||
}
|
||||
Lab2XYZ(L[i], av, bv, X, Y, Z);
|
||||
xyz2rgb(X, Y, Z, R[i], G[i], B[i], wp);
|
||||
}
|
||||
}
|
||||
|
||||
void Color::XYZ2Lab(float X, float Y, float Z, float &L, float &a, float &b)
|
||||
{
|
||||
|
||||
|
@ -617,6 +617,7 @@ public:
|
||||
*/
|
||||
static void XYZ2Lab(float x, float y, float z, float &L, float &a, float &b);
|
||||
static void RGB2Lab(float *X, float *Y, float *Z, float *L, float *a, float *b, const float wp[3][3], int width);
|
||||
static void Lab2RGBLimit(float *L, float *a, float *b, float *R, float *G, float *B, const float wp[3][3], float limit, float afactor, float bfactor, int width);
|
||||
static void RGB2L(float *X, float *Y, float *Z, float *L, const float wp[3][3], int width);
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user