From 3567d54b526fcd6d6d63998714a1b056816f7786 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Wed, 1 Dec 2021 15:01:27 +0100 Subject: [PATCH] faster sigmoid function to create the contrast mask used in dual demosaic and capture sharpening, #6386 (#6387) --- rtengine/rt_algo.cc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index e4ae84a46..ff4c572fa 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -35,14 +35,24 @@ namespace { -template -T calcBlendFactor(T val, T threshold) { +float calcBlendFactor(float val, float threshold) { // sigmoid function // result is in ]0;1] range // inflexion point is at (x, y) (threshold, 0.5) - return 1.f / (1.f + xexpf(16.f - (16.f / threshold) * val)); + const float x = -16.f + (16.f / threshold) * val; + return 0.5f * (1.f + x / std::sqrt(1.f + rtengine::SQR(x))); } +#ifdef __SSE2__ +vfloat calcBlendFactor(vfloat val, vfloat threshold) { + // sigmoid function + // result is in ]0;1] range + // inflexion point is at (x, y) (threshold, 0.5) + const vfloat x = -16.f + (16.f / threshold) * val; + return 0.5f * (1.f + x * _mm_rsqrt_ps(1.f + rtengine::SQR(x))); +} +#endif + float tileAverage(const float * const *data, size_t tileY, size_t tileX, size_t tilesize) { float avg = 0.f;