From b1d673a2ba77c31e57961cca5daf53873aa55289 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Sat, 17 Feb 2018 20:52:00 +0100 Subject: [PATCH] Fix bug in vectorized lut access, fixes #4392 --- rtengine/LUT.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/rtengine/LUT.h b/rtengine/LUT.h index b2d11c234..d4d2a91f8 100644 --- a/rtengine/LUT.h +++ b/rtengine/LUT.h @@ -97,7 +97,6 @@ protected: float maxsf; // For the SSE routine operator[](vfloat), we just clip float lookup values // to just below the max value. - float maxIndexFloat; T * data; unsigned int clip; unsigned int size; @@ -135,7 +134,6 @@ public: upperBound = size - 1; maxs = size - 2; maxsf = (float)maxs; - maxIndexFloat = ((float)upperBound) - 1e-5; #ifdef __SSE2__ maxsv = F2V( maxs ); sizeiv = _mm_set1_epi32( (int)(size - 1) ); @@ -166,7 +164,6 @@ public: upperBound = size - 1; maxs = size - 2; maxsf = (float)maxs; - maxIndexFloat = ((float)upperBound) - 1e-5; #ifdef __SSE2__ maxsv = F2V( maxs ); sizeiv = _mm_set1_epi32( (int)(size - 1) ); @@ -242,7 +239,6 @@ public: this->upperBound = rhs.upperBound; this->maxs = this->size - 2; this->maxsf = (float)this->maxs; - this->maxIndexFloat = ((float)this->upperBound) - 1e-5; #ifdef __SSE2__ this->maxsv = F2V( this->size - 2); this->sizeiv = _mm_set1_epi32( (int)(this->size - 1) ); @@ -317,7 +313,7 @@ public: // Clamp and convert to integer values. Extract out of SSE register because all // lookup operations use regular addresses. - vfloat clampedIndexes = vmaxf(ZEROV, vminf(F2V(maxIndexFloat), indexv)); + vfloat clampedIndexes = vmaxf(ZEROV, vminf(maxsv, indexv)); vint indexes = _mm_cvttps_epi32(clampedIndexes); int indexArray[4]; _mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes); @@ -349,7 +345,7 @@ public: // Clamp and convert to integer values. Extract out of SSE register because all // lookup operations use regular addresses. - vfloat clampedIndexes = vmaxf(ZEROV, vminf(F2V(maxIndexFloat), indexv)); + vfloat clampedIndexes = vmaxf(ZEROV, vminf(maxsv, indexv)); vint indexes = _mm_cvttps_epi32(clampedIndexes); int indexArray[4]; _mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes); @@ -369,7 +365,7 @@ public: vfloat lower = _mm_castsi128_ps(_mm_unpacklo_epi64(temp0, temp1)); vfloat upper = _mm_castsi128_ps(_mm_unpackhi_epi64(temp0, temp1)); - vfloat diff = clampedIndexes - _mm_cvtepi32_ps(indexes); + vfloat diff = vmaxf(ZEROV, vminf(sizev, indexv)) - _mm_cvtepi32_ps(indexes); return vintpf(diff, upper, lower); } @@ -380,7 +376,7 @@ public: // Clamp and convert to integer values. Extract out of SSE register because all // lookup operations use regular addresses. - vfloat clampedIndexes = vmaxf(ZEROV, vminf(F2V(maxsf), indexv)); + vfloat clampedIndexes = vmaxf(ZEROV, vminf(maxsv, indexv)); vint indexes = _mm_cvttps_epi32(clampedIndexes); int indexArray[4]; _mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes); @@ -587,7 +583,6 @@ public: maxs = 0; maxsf = 0.f; clip = 0; - maxIndexFloat = ((float)upperBound) - 1e-5; } // create an identity LUT (LUT(x) = x) or a scaled identity LUT (LUT(x) = x / divisor) @@ -697,7 +692,6 @@ public: upperBound = size - 1; maxs = size - 2; maxsf = (float)maxs; - maxIndexFloat = ((float)upperBound) - 1e-5; #ifdef __SSE2__ maxsv = F2V( size - 2); sizeiv = _mm_set1_epi32( (int)(size - 1) );