Code review changes
This commit is contained in:
@@ -95,7 +95,8 @@ protected:
|
|||||||
// list of variables ordered to improve cache speed
|
// list of variables ordered to improve cache speed
|
||||||
unsigned int maxs;
|
unsigned int maxs;
|
||||||
float maxsf;
|
float maxsf;
|
||||||
// possibly-more-correct value for sse routine (see unit test for details)
|
// For the SSE routine operator[](vfloat), we just clip float lookup values
|
||||||
|
// to just below the max value.
|
||||||
float maxIndexFloat;
|
float maxIndexFloat;
|
||||||
T * data;
|
T * data;
|
||||||
unsigned int clip;
|
unsigned int clip;
|
||||||
@@ -125,7 +126,10 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
dirty = true;
|
dirty = true;
|
||||||
clip = flags;
|
clip = flags;
|
||||||
data = new T[s];
|
// Add a few extra elements so [](vfloat) won't access out-of-bounds memory.
|
||||||
|
// The routine would still produce the right answer, but might cause issues
|
||||||
|
// with address/heap checking programs.
|
||||||
|
data = new T[s + 3];
|
||||||
owner = 1;
|
owner = 1;
|
||||||
size = s;
|
size = s;
|
||||||
upperBound = size - 1;
|
upperBound = size - 1;
|
||||||
@@ -155,7 +159,8 @@ public:
|
|||||||
|
|
||||||
dirty = true; // Assumption!
|
dirty = true; // Assumption!
|
||||||
clip = flags;
|
clip = flags;
|
||||||
data = new T[s];
|
// See comment in constructor.
|
||||||
|
data = new T[s + 3];
|
||||||
owner = 1;
|
owner = 1;
|
||||||
size = s;
|
size = s;
|
||||||
upperBound = size - 1;
|
upperBound = size - 1;
|
||||||
@@ -222,7 +227,8 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (this->data == nullptr) {
|
if (this->data == nullptr) {
|
||||||
this->data = new T[rhs.size];
|
// See comment in constructor.
|
||||||
|
this->data = new T[rhs.size + 3];
|
||||||
}
|
}
|
||||||
|
|
||||||
this->clip = rhs.clip;
|
this->clip = rhs.clip;
|
||||||
@@ -327,7 +333,7 @@ public:
|
|||||||
vfloat upper = _mm_castsi128_ps(_mm_unpackhi_epi64(temp0, temp1));
|
vfloat upper = _mm_castsi128_ps(_mm_unpackhi_epi64(temp0, temp1));
|
||||||
|
|
||||||
vfloat diff = clampedIndexes - _mm_cvtepi32_ps(indexes);
|
vfloat diff = clampedIndexes - _mm_cvtepi32_ps(indexes);
|
||||||
return (_mm_set1_ps(1.0f) - diff) * lower + (diff * upper);
|
return vintpf(diff, upper, lower);
|
||||||
}
|
}
|
||||||
#ifdef __SSE4_1__
|
#ifdef __SSE4_1__
|
||||||
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
|
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
|
||||||
@@ -426,9 +432,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
idx = 0;
|
idx = 0;
|
||||||
// Note: Maybe this should be 'idx > maxsf'? See unit test where a LUT with
|
} else if (idx > maxs) {
|
||||||
// values [10, 11, 12, 13] gets looked up at 2.5 and returns 12.5.
|
|
||||||
} else if (index > maxsf) {
|
|
||||||
if (clip & LUT_CLIP_ABOVE) {
|
if (clip & LUT_CLIP_ABOVE) {
|
||||||
return data[upperBound];
|
return data[upperBound];
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user