From 204475dd05576f9e2d4b8b8d8e443603f1f3d9aa Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 23 Sep 2019 13:42:23 +0200 Subject: [PATCH] sleefsseavx: added horizontal min and max --- rtengine/sleefsseavx.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index 3000c1c10..cce88df5d 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -1390,6 +1390,18 @@ static inline float vhadd( vfloat a ) { return _mm_cvtss_f32(_mm_add_ss(a, _mm_shuffle_ps(a, a, 1))); } +static inline float vhmin(vfloat a) { + // returns min(a[0], a[1], a[2], a[3]) + a = vminf(a, _mm_movehl_ps(a, a)); + return _mm_cvtss_f32(vminf(a, _mm_shuffle_ps(a, a, 1))); +} + +static inline float vhmax(vfloat a) { + // returns max(a[0], a[1], a[2], a[3]) + a = vmaxf(a, _mm_movehl_ps(a, a)); + return _mm_cvtss_f32(vmaxf(a, _mm_shuffle_ps(a, a, 1))); +} + static INLINE vfloat vmul2f(vfloat a){ // fastest way to multiply by 2 return a + a;