SSE: Review usage of vminf, vmaxf functions, #4942

This commit is contained in:
heckflosse
2018-11-05 15:59:41 +01:00
parent fa91103aec
commit 0983817434
8 changed files with 28 additions and 23 deletions

View File

@@ -320,7 +320,7 @@ public:
// Clamp and convert to integer values. Extract out of SSE register because all
// lookup operations use regular addresses.
vfloat clampedIndexes = vmaxf(ZEROV, vminf(maxsv, indexv));
vfloat clampedIndexes = vmaxf(vminf(maxsv, indexv), ZEROV); // this automagically uses ZEROV in case indexv is NaN
vint indexes = _mm_cvttps_epi32(clampedIndexes);
int indexArray[4];
_mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes);
@@ -352,7 +352,7 @@ public:
// Clamp and convert to integer values. Extract out of SSE register because all
// lookup operations use regular addresses.
vfloat clampedIndexes = vmaxf(ZEROV, vminf(maxsv, indexv));
vfloat clampedIndexes = vmaxf(vminf(maxsv, indexv), ZEROV); // this automagically uses ZEROV in case indexv is NaN
vint indexes = _mm_cvttps_epi32(clampedIndexes);
int indexArray[4];
_mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes);
@@ -372,7 +372,7 @@ public:
vfloat lower = _mm_castsi128_ps(_mm_unpacklo_epi64(temp0, temp1));
vfloat upper = _mm_castsi128_ps(_mm_unpackhi_epi64(temp0, temp1));
vfloat diff = vmaxf(ZEROV, vminf(sizev, indexv)) - _mm_cvtepi32_ps(indexes);
vfloat diff = vmaxf(vminf(sizev, indexv), ZEROV) - _mm_cvtepi32_ps(indexes); // this automagically uses ZEROV in case indexv is NaN
return vintpf(diff, upper, lower);
}
@@ -383,7 +383,7 @@ public:
// Clamp and convert to integer values. Extract out of SSE register because all
// lookup operations use regular addresses.
vfloat clampedIndexes = vmaxf(ZEROV, vminf(maxsv, indexv));
vfloat clampedIndexes = vmaxf(vminf(maxsv, indexv), ZEROV); // this automagically uses ZEROV in case indexv is NaN
vint indexes = _mm_cvttps_epi32(clampedIndexes);
int indexArray[4];
_mm_storeu_si128(reinterpret_cast<__m128i*>(&indexArray[0]), indexes);
@@ -420,7 +420,8 @@ public:
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
vfloat operator[](vint idxv) const
{
vfloat tempv = vmaxf(ZEROV, vminf(sizev, _mm_cvtepi32_ps(idxv))); // convert to float because SSE2 has no min/max for 32bit integers
// convert to float because SSE2 has no min/max for 32bit integers
vfloat tempv = vmaxf(vminf(sizev, _mm_cvtepi32_ps(idxv)), ZEROV); // this automagically uses ZEROV in case idxv is NaN (which will never happen because it is a vector of int)
idxv = _mm_cvttps_epi32(tempv);
// access the LUT 4 times. Trust the compiler. It generates good code here, better than hand written SSE code
return _mm_setr_ps(data[_mm_cvtsi128_si32(idxv)],