Colortoning lab regions: vectorized last loop, #4914

This commit is contained in:
heckflosse 2018-11-02 23:05:40 +01:00
parent d07d06a885
commit 240f1eac65

View File

@ -182,27 +182,59 @@ BENCHFUN
} }
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for if (multiThread) #pragma omp parallel if (multiThread)
#endif #endif
for (int y = 0; y < lab->H; ++y) { {
for (int x = 0; x < lab->W; ++x) { #ifdef __SSE2__
float l = lab->L[y][x]; vfloat c42000v = F2V(42000.f);
float a = lab->a[y][x]; vfloat cm42000v = F2V(-42000.f);
float b = lab->b[y][x]; vfloat c32768v = F2V(32768.f);
#endif
#ifdef _OPENMP
#pragma omp for
#endif
for (int y = 0; y < lab->H; ++y) {
int x = 0;
#ifdef __SSE2__
for (; x < lab->W - 3; x += 4) {
vfloat lv = LVFU(lab->L[y][x]);
vfloat av = LVFU(lab->a[y][x]);
vfloat bv = LVFU(lab->b[y][x]);
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
float blend = abmask[i][y][x]; vfloat blendv = LVFU(abmask[i][y][x]);
float s = rs[i]; vfloat sv = F2V(rs[i]);
float a_new = LIM(s * (a + abca[i]), -42000.f, 42000.f); vfloat a_newv = LIMV(sv * (av + F2V(abca[i])), cm42000v, c42000v);
float b_new = LIM(s * (b + abcb[i]), -42000.f, 42000.f); vfloat b_newv = LIMV(sv * (bv + F2V(abcb[i])), cm42000v, c42000v);
float l_new = LIM(l * rl[i], 0.f, 32768.f); vfloat l_newv = LIMV(lv * F2V(rl[i]), ZEROV, c32768v);
l = intp(Lmask[i][y][x], l_new, l); lv = vintpf(LVFU(Lmask[i][y][x]), l_newv, lv);
a = intp(blend, a_new, a); av = vintpf(blendv, a_newv, av);
b = intp(blend, b_new, b); bv = vintpf(blendv, b_newv, bv);
}
STVFU(lab->L[y][x], lv);
STVFU(lab->a[y][x], av);
STVFU(lab->b[y][x], bv);
}
#endif
for (; x < lab->W; ++x) {
float l = lab->L[y][x];
float a = lab->a[y][x];
float b = lab->b[y][x];
for (int i = 0; i < n; ++i) {
float blend = abmask[i][y][x];
float s = rs[i];
float a_new = LIM(s * (a + abca[i]), -42000.f, 42000.f);
float b_new = LIM(s * (b + abcb[i]), -42000.f, 42000.f);
float l_new = LIM(l * rl[i], 0.f, 32768.f);
l = intp(Lmask[i][y][x], l_new, l);
a = intp(blend, a_new, a);
b = intp(blend, b_new, b);
}
lab->L[y][x] = l;
lab->a[y][x] = a;
lab->b[y][x] = b;
} }
lab->L[y][x] = l;
lab->a[y][x] = a;
lab->b[y][x] = b;
} }
} }
} }