Minor cleanups

This commit is contained in:
Flössie
2020-07-02 17:38:59 +02:00
parent cf0ccc26ca
commit af4b17ac84
2 changed files with 46 additions and 44 deletions

View File

@@ -58,6 +58,7 @@
#pragma once
#include <algorithm>
#include <cstring>
#include <cstdint>
#include <cassert>
@@ -98,8 +99,8 @@ protected:
unsigned int size;
unsigned int upperBound; // always equals size-1, parameter created for performance reason
private:
unsigned int owner;
#ifdef __SSE2__
unsigned int owner;
alignas(16) vfloat maxsv;
alignas(16) vfloat sizev;
alignas(16) vint sizeiv;
@@ -140,35 +141,30 @@ public:
}
}
LUT(const std::vector<T> input, int flags = LUT_CLIP_BELOW | LUT_CLIP_ABOVE)
LUT(const std::vector<T>& input, int flags = LUT_CLIP_BELOW | LUT_CLIP_ABOVE) :
maxs(input.size() - 2),
maxsf(maxs),
data(new T[input.size() + 3]), // Add a few extra elements so [](vfloat) won't access out-of-bounds memory.
clip(flags),
size(input.size()),
upperBound(size - 1),
owner(1),
#ifdef __SSE2__
maxsv(F2V(maxs)),
sizev(F2V(size - 1)),
sizeiv(_mm_set1_epi32(size - 1)),
#endif
dirty(true)
{
#ifndef NDEBUG
if (input.size() <= 0) {
printf("s<=0!\n");
if (input.empty()) {
printf("s=0!\n");
}
assert (input.size() > 0);
assert(!input.empty());
#endif
dirty = true;
clip = flags;
// Add a few extra elements so [](vfloat) won't access out-of-bounds memory.
// The routine would still produce the right answer, but might cause issues
// with address/heap checking programs.
data = new T[input.size() + 3];
owner = 1;
size = input.size();
upperBound = size - 1;
maxs = size - 2;
maxsf = (float)maxs;
#ifdef __SSE2__
maxsv = F2V( maxs );
sizeiv = _mm_set1_epi32( (int)(size - 1) );
sizev = F2V( size - 1 );
#endif
for (size_t i = 0; i < input.size(); ++i) {
data[i] = input[i];
}
std::copy_n(input.begin(), input.size(), data);
}
void operator ()(int s, int flags = LUT_CLIP_BELOW | LUT_CLIP_ABOVE, bool initZero = false)
@@ -256,7 +252,7 @@ public:
return size > 0 ? upperBound : 0;
}
LUT<T> & operator=(const LUT<T>& rhs)
LUT<T>& operator=(const LUT<T>& rhs)
{
if (this != &rhs) {
if (rhs.size > this->size) {
@@ -287,7 +283,7 @@ public:
}
// handy to sum up per thread histograms. #pragma omp simd speeds up the loop by about factor 3 for LUTu (uint32_t).
LUT<T> & operator+=(const LUT<T>& rhs)
LUT<T>& operator+=(const LUT<T>& rhs)
{
if (rhs.size == this->size) {
#ifdef _OPENMP
@@ -304,7 +300,7 @@ public:
// multiply all elements of LUT<float> with a constant float value
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
LUT<float> & operator*=(float factor)
LUT<float>& operator*=(float factor)
{
#ifdef _OPENMP
#pragma omp simd
@@ -319,7 +315,7 @@ public:
// divide all elements of LUT<float> by a constant float value
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
LUT<float> & operator/=(float divisor)
LUT<float>& operator/=(float divisor)
{
#ifdef _OPENMP
#pragma omp simd
@@ -489,7 +485,7 @@ public:
// Return the value for "index" that is in the [0-1] range.
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
T getVal01 (float index) const
T getVal01(float index) const
{
index *= (float)upperBound;
int idx = (int)index; // don't use floor! The difference in negative space is no problems here
@@ -514,19 +510,19 @@ public:
return (p1 + p2 * diff);
}
operator bool (void) const
operator bool() const // FIXME: Should be explicit
{
return size > 0;
}
void clear(void)
void clear()
{
if (data && size) {
memset(data, 0, size * sizeof(T));
}
}
void reset(void)
void reset()
{
if (data) {
delete[] data;