From c67b986744d21b803de17dba71f1d82d6bc38a7e Mon Sep 17 00:00:00 2001
From: heckflosse <heckflosse67@gmx.de>
Date: Wed, 21 Sep 2016 00:22:42 +0200
Subject: [PATCH] add faster implementation to clip float to [0;65535] and
 round

---
 rtengine/rt_math.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/rtengine/rt_math.h b/rtengine/rt_math.h
index 0836c8be7..b5c93d127 100644
--- a/rtengine/rt_math.h
+++ b/rtengine/rt_math.h
@@ -80,7 +80,7 @@ inline _Tp intp(_Tp a, _Tp b, _Tp c)
     // following is valid:
     // intp(a, b+x, c+x) = intp(a, b, c) + x
     // intp(a, b*x, c*x) = intp(a, b, c) * x
-    return a * (b-c) + c;
+    return a * (b - c) + c;
 }
 
 template<typename T>
@@ -101,5 +101,17 @@ inline T norminf(const T& x, const T& y)
     return std::max(std::abs(x), std::abs(y));
 }
 
-}
+inline int float2uint16range(float d) // clips input to [0;65535] and rounds
+{
+    d = CLIP(d); // clip to [0;65535]
+#ifdef __SSE2__ // this only works in IEEE 754 maths. For simplicity I restricted it to SSE2. We can enhance it later, but we have to take care of endianness then.
+    d += 12582912.f;
+    return reinterpret_cast<int&>(d);
+#else // fall back to slow std::round()
+    return std::round(d);
+#endif
+}
+
+}
+
 #endif