Merge pull request #5146 from Beep6581/fix-non-sse-standard-tonecurve
Fix standard tonecurve for non sse builds, also inluced speedup for s…
This commit is contained in:
commit
f84a75de1e
@ -49,8 +49,7 @@ namespace rtengine
|
|||||||
class ToneCurve;
|
class ToneCurve;
|
||||||
class ColorAppearance;
|
class ColorAppearance;
|
||||||
|
|
||||||
template <typename T>
|
inline void setUnlessOOG(float &r, float &g, float &b, const float &rr, const float &gg, const float &bb)
|
||||||
void setUnlessOOG(T &r, T &g, T &b, const T &rr, const T &gg, const T &bb)
|
|
||||||
{
|
{
|
||||||
if (!OOG(r) || !OOG(g) || !OOG(b)) {
|
if (!OOG(r) || !OOG(g) || !OOG(b)) {
|
||||||
r = rr;
|
r = rr;
|
||||||
@ -59,6 +58,22 @@ void setUnlessOOG(T &r, T &g, T &b, const T &rr, const T &gg, const T &bb)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __SSE2__
|
||||||
|
inline vmask OOG(const vfloat val)
|
||||||
|
{
|
||||||
|
return vorm(vmaskf_lt(val, ZEROV), vmaskf_gt(val, F2V(65535.f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline void setUnlessOOG(vfloat &r, vfloat &g, vfloat &b, const vfloat rr, const vfloat gg, const vfloat bb)
|
||||||
|
{
|
||||||
|
vmask cond = vandm(vandm(OOG(r), OOG(g)), OOG(b));
|
||||||
|
r = vself(cond, r, rr);
|
||||||
|
g = vself(cond, g, gg);
|
||||||
|
b = vself(cond, b, bb);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
bool sanitizeCurve(std::vector<double>& curve);
|
bool sanitizeCurve(std::vector<double>& curve);
|
||||||
|
|
||||||
namespace curves {
|
namespace curves {
|
||||||
@ -967,33 +982,24 @@ inline void StandardToneCurve::BatchApply(
|
|||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
curves::setLutVal(lutToneCurve, r[i], g[i], b[i]);
|
setUnlessOOG(r[i], g[i], b[i], lutToneCurve[r[i]], lutToneCurve[g[i]], lutToneCurve[b[i]]);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
float tmpr[4] ALIGNED16;
|
|
||||||
float tmpg[4] ALIGNED16;
|
|
||||||
float tmpb[4] ALIGNED16;
|
|
||||||
// float mv = lutToneCurve[MAXVALF];
|
|
||||||
for (; i + 3 < end; i += 4) {
|
for (; i + 3 < end; i += 4) {
|
||||||
__m128 r_val = LVF(r[i]);
|
vfloat r_val = LVF(r[i]);
|
||||||
__m128 g_val = LVF(g[i]);
|
vfloat g_val = LVF(g[i]);
|
||||||
__m128 b_val = LVF(b[i]);
|
vfloat b_val = LVF(b[i]);
|
||||||
STVF(tmpr[0], lutToneCurve[r_val]);
|
setUnlessOOG(r_val, g_val, b_val, lutToneCurve[r_val], lutToneCurve[g_val], lutToneCurve[b_val]);
|
||||||
STVF(tmpg[0], lutToneCurve[g_val]);
|
STVF(r[i], r_val);
|
||||||
STVF(tmpb[0], lutToneCurve[b_val]);
|
STVF(g[i], g_val);
|
||||||
for (int j = 0; j < 4; ++j) {
|
STVF(b[i], b_val);
|
||||||
setUnlessOOG(r[i+j], g[i+j], b[i+j], tmpr[j], tmpg[j], tmpb[j]);
|
|
||||||
// curves::setLutVal(r[i+j], tmpr[j], mv);
|
|
||||||
// curves::setLutVal(g[i+j], tmpg[j], mv);
|
|
||||||
// curves::setLutVal(b[i+j], tmpb[j], mv);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remainder in non-SSE.
|
// Remainder in non-SSE.
|
||||||
for (; i < end; ++i) {
|
for (; i < end; ++i) {
|
||||||
curves::setLutVal(lutToneCurve, r[i], g[i], b[i]);
|
setUnlessOOG(r[i], g[i], b[i], lutToneCurve[r[i]], lutToneCurve[g[i]], lutToneCurve[b[i]]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@ -1153,10 +1159,6 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
|
|||||||
const vfloat zd5v = F2V(0.5f);
|
const vfloat zd5v = F2V(0.5f);
|
||||||
const vfloat zd25v = F2V(0.25f);
|
const vfloat zd25v = F2V(0.25f);
|
||||||
|
|
||||||
float tmpr[4] ALIGNED16;
|
|
||||||
float tmpg[4] ALIGNED16;
|
|
||||||
float tmpb[4] ALIGNED16;
|
|
||||||
|
|
||||||
for (; i + 3 < end; i += 4) {
|
for (; i + 3 < end; i += 4) {
|
||||||
vfloat r_val = vclampf(LVF(r[i]), ZEROV, c65535v);
|
vfloat r_val = vclampf(LVF(r[i]), ZEROV, c65535v);
|
||||||
vfloat g_val = vclampf(LVF(g[i]), ZEROV, c65535v);
|
vfloat g_val = vclampf(LVF(g[i]), ZEROV, c65535v);
|
||||||
@ -1173,12 +1175,16 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
|
|||||||
vfloat r3 = Triangle(b_val, b3, r_val);
|
vfloat r3 = Triangle(b_val, b3, r_val);
|
||||||
vfloat g3 = Triangle(b_val, b3, g_val);
|
vfloat g3 = Triangle(b_val, b3, g_val);
|
||||||
|
|
||||||
STVF(tmpr[0], vclampf(r1 * zd5v + r2 * zd25v + r3 * zd25v, ZEROV, c65535v));
|
vfloat r_old = LVF(r[i]);
|
||||||
STVF(tmpg[0], vclampf(g1 * zd25v + g2 * zd5v + g3 * zd25v, ZEROV, c65535v));
|
vfloat g_old = LVF(g[i]);
|
||||||
STVF(tmpb[0], vclampf(b1 * zd25v + b2 * zd25v + b3 * zd5v, ZEROV, c65535v));
|
vfloat b_old = LVF(b[i]);
|
||||||
for (int j = 0; j < 4; ++j) {
|
vfloat r_new = vclampf(r1 * zd5v + r2 * zd25v + r3 * zd25v, ZEROV, c65535v);
|
||||||
setUnlessOOG(r[i+j], g[i+j], b[i+j], tmpr[j], tmpg[j], tmpb[j]);
|
vfloat g_new = vclampf(g1 * zd25v + g2 * zd5v + g3 * zd25v, ZEROV, c65535v);
|
||||||
}
|
vfloat b_new = vclampf(b1 * zd25v + b2 * zd25v + b3 * zd5v, ZEROV, c65535v);
|
||||||
|
setUnlessOOG(r_old, g_old, b_old, r_new, g_new, b_new);
|
||||||
|
STVF(r[i], r_old);
|
||||||
|
STVF(g[i], g_old);
|
||||||
|
STVF(b[i], b_old);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remainder in non-SSE.
|
// Remainder in non-SSE.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user