From d43ccc5e816f847ec153db612a94e061a24f05fe Mon Sep 17 00:00:00 2001 From: heckflosse Date: Mon, 31 Dec 2018 18:47:08 +0100 Subject: [PATCH] Add vceilf(x) --- rtengine/sleefsseavx.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index 83d937bd1..3000c1c10 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -1427,5 +1427,21 @@ static INLINE void vconvertrgbrgbrgbrgb2rrrrggggbbbb (const float * src, vfloat bv = _mm_setr_ps(src[2],src[5],src[8],src[11]); } +#if defined( __SSE4_1__ ) && defined( __x86_64__ ) +static INLINE vfloat vceilf(vfloat x) { + return _mm_round_ps(x, _MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC); +} + +#else + +static INLINE vfloat vceilf(vfloat x) { + __m128i zerov = _mm_setzero_si128(); + zerov = _mm_cmpeq_epi32(zerov, zerov); + const vfloat onev = (vfloat)_mm_slli_epi32(_mm_srli_epi32(zerov, 25), 23); //create vector 1.0f + const vfloat xi = _mm_cvtepi32_ps(_mm_cvttps_epi32(x)); + return xi + _mm_and_ps(_mm_cmplt_ps(xi, x), onev); +} +#endif + #endif // __SSE2__ #endif // SLEEFSSEAVX