Capture sharpening: small speedup

This commit is contained in:
Ingo Weyrich 2019-08-15 23:23:28 +02:00
parent 8421f8780a
commit e4b955523e
2 changed files with 58 additions and 10 deletions

View File

@ -1806,9 +1806,37 @@ public:
static inline void RGB2YCbCr(float R, float G, float B, float &Y, float &Cb, float &Cr) { static inline void RGB2YCbCr(float R, float G, float B, float &Y, float &Cb, float &Cr) {
Y = 0.2627f * R + 0.6780f * G + 0.0593f * B; Y = 0.2627f * R + 0.6780f * G + 0.0593f * B;
Cb = -0.2627f * R - 0.6780f * G + (1.f - 0.0593f) * B; Cb = (1.f - 0.0593f) * B - (0.2627f * R + 0.6780f * G);
Cr = (1.f - 0.2627f) * R - 0.6780f * G - 0.0593f * B; Cr = (1.f - 0.2627f) * R - (0.6780f * G + 0.0593f * B);
}
static inline void RGB2YCbCr(float* R, float* G, float* B, float* Y, float* Cb, float *Cr, float gamma, int W) {
gamma = 1.f / gamma;
int i = 0;
#ifdef __SSE2__
const vfloat gammav = F2V(gamma);
const vfloat c1v = F2V(0.2627f);
const vfloat c2v = F2V(0.6780f);
const vfloat c3v = F2V(0.0593f);
const vfloat c4v = F2V(1.f - 0.0593f);
const vfloat c5v = F2V(1.f - 0.2627f);
for (; i < W - 3; i += 4) {
const vfloat Rv = vmaxf(LVFU(R[i]), ZEROV);
const vfloat Gv = vmaxf(LVFU(G[i]), ZEROV);
const vfloat Bv = vmaxf(LVFU(B[i]), ZEROV);
STVFU(Y[i], pow_F(c1v * Rv + c2v * Gv + c3v * Bv, gammav));
STVFU(Cb[i], c4v * Bv - (c1v * Rv + c2v * Gv));
STVFU(Cr[i], c5v * Rv - (c2v * Gv + c3v * Bv));
}
#endif
for (; i < W; ++i) {
const float r = std::max(R[i], 0.f);
const float g = std::max(G[i], 0.f);
const float b = std::max(B[i], 0.f);
Y[i] = pow_F(0.2627f * r + 0.6780f * g + 0.0593f * b, gamma);
Cb[i] = (1.f - 0.0593f) * b - (0.2627f * r + 0.6780f * g);
Cr[i] = (1.f - 0.2627f) * r - (0.6780f * g + 0.0593f * b);
}
} }
static inline void YCbCr2RGB(float Y, float Cb, float Cr, float &R, float &G, float &B) { static inline void YCbCr2RGB(float Y, float Cb, float Cr, float &R, float &G, float &B) {
@ -1816,6 +1844,32 @@ public:
G = std::max(Y - (0.0593f / 0.6780f) * Cb - (0.2627f / 0.6780f) * Cr, 0.f); G = std::max(Y - (0.0593f / 0.6780f) * Cb - (0.2627f / 0.6780f) * Cr, 0.f);
B = std::max(Y + Cb, 0.f); B = std::max(Y + Cb, 0.f);
} }
static inline void YCbCr2RGB(float* Y, float* Cb, float* Cr, float* R, float* G, float* B, float gamma, int W) {
int i = 0;
#ifdef __SSE2__
const vfloat gammav = F2V(gamma);
const vfloat c1v = F2V(0.0593f / 0.6780f);
const vfloat c2v = F2V(0.2627f / 0.6780f);
for (; i < W - 3; i += 4) {
const vfloat Yv = pow_F(LVFU(Y[i]), gammav);
const vfloat Crv = LVFU(Cr[i]);
const vfloat Cbv = LVFU(Cb[i]);
STVFU(R[i], vmaxf(Yv + Crv, ZEROV));
STVFU(G[i], vmaxf(Yv - c1v * Cbv - c2v * Crv, ZEROV));
STVFU(B[i], vmaxf(Yv + Cbv, ZEROV));
}
#endif
for (; i < W; ++i) {
const float y = pow_F(Y[i], gamma);
const float cr = Cr[i];
const float cb = Cb[i];
R[i] = std::max(y + cr, 0.f);
G[i] = std::max(y - (0.0593f / 0.6780f) * cb - (0.2627f / 0.6780f) * cr, 0.f);
B[i] = std::max(y + cb, 0.f);
}
}
}; };
} }

View File

@ -4998,10 +4998,7 @@ BENCHFUN
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < H; ++i) { for (int i = 0; i < H; ++i) {
Color::RGB2L(red[i], green[i], blue[i], L[i], xyz_rgb, W); Color::RGB2L(red[i], green[i], blue[i], L[i], xyz_rgb, W);
for (int j = 0; j < W; ++j) { Color::RGB2YCbCr(red[i], green[i], blue[i], Y[i], Cb[i], Cr[i], gamma, W);
Color::RGB2YCbCr(std::max(red[i][j], 0.f), std::max(green[i][j], 0.f), std::max(blue[i][j], 0.f), Y[i][j], Cb[i][j], Cr[i][j]);
Y[i][j] = pow_F(Y[i][j], 1.f / gamma);
}
} }
// calculate contrast based blend factors to reduce sharpening in regions with low contrast // calculate contrast based blend factors to reduce sharpening in regions with low contrast
JaggedArray<float> blend(W, H); JaggedArray<float> blend(W, H);
@ -5015,10 +5012,7 @@ BENCHFUN
StopWatch Stop2("Y2RGB"); StopWatch Stop2("Y2RGB");
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < H; ++i) { for (int i = 0; i < H; ++i) {
for (int j = 0; j < W ; ++j) { Color::YCbCr2RGB(Y[i], Cb[i], Cr[i], red[i], green[i], blue[i], gamma, W);
Y[i][j] = pow_F(Y[i][j], gamma);
Color::YCbCr2RGB(Y[i][j], Cb[i][j], Cr[i][j], red[i][j], green[i][j], blue[i][j]);
}
} }
Stop2.stop(); Stop2.stop();
} }