Colortoning lab regions: vectorized last loop, #4914

This commit is contained in:
heckflosse 2018-11-02 23:05:40 +01:00
parent d07d06a885
commit 240f1eac65

View File

@ -182,10 +182,41 @@ BENCHFUN
} }
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for if (multiThread) #pragma omp parallel if (multiThread)
#endif
{
#ifdef __SSE2__
vfloat c42000v = F2V(42000.f);
vfloat cm42000v = F2V(-42000.f);
vfloat c32768v = F2V(32768.f);
#endif
#ifdef _OPENMP
#pragma omp for
#endif #endif
for (int y = 0; y < lab->H; ++y) { for (int y = 0; y < lab->H; ++y) {
for (int x = 0; x < lab->W; ++x) { int x = 0;
#ifdef __SSE2__
for (; x < lab->W - 3; x += 4) {
vfloat lv = LVFU(lab->L[y][x]);
vfloat av = LVFU(lab->a[y][x]);
vfloat bv = LVFU(lab->b[y][x]);
for (int i = 0; i < n; ++i) {
vfloat blendv = LVFU(abmask[i][y][x]);
vfloat sv = F2V(rs[i]);
vfloat a_newv = LIMV(sv * (av + F2V(abca[i])), cm42000v, c42000v);
vfloat b_newv = LIMV(sv * (bv + F2V(abcb[i])), cm42000v, c42000v);
vfloat l_newv = LIMV(lv * F2V(rl[i]), ZEROV, c32768v);
lv = vintpf(LVFU(Lmask[i][y][x]), l_newv, lv);
av = vintpf(blendv, a_newv, av);
bv = vintpf(blendv, b_newv, bv);
}
STVFU(lab->L[y][x], lv);
STVFU(lab->a[y][x], av);
STVFU(lab->b[y][x], bv);
}
#endif
for (; x < lab->W; ++x) {
float l = lab->L[y][x]; float l = lab->L[y][x];
float a = lab->a[y][x]; float a = lab->a[y][x];
float b = lab->b[y][x]; float b = lab->b[y][x];
@ -206,5 +237,6 @@ BENCHFUN
} }
} }
} }
}
} // namespace rtengine } // namespace rtengine