diff --git a/rtengine/LUT.h b/rtengine/LUT.h index 933fdba3a..c426bbaaa 100644 --- a/rtengine/LUT.h +++ b/rtengine/LUT.h @@ -58,6 +58,7 @@ #pragma once +#include #include #include #include @@ -98,8 +99,8 @@ protected: unsigned int size; unsigned int upperBound; // always equals size-1, parameter created for performance reason private: - unsigned int owner; #ifdef __SSE2__ + unsigned int owner; alignas(16) vfloat maxsv; alignas(16) vfloat sizev; alignas(16) vint sizeiv; @@ -140,35 +141,30 @@ public: } } - LUT(const std::vector input, int flags = LUT_CLIP_BELOW | LUT_CLIP_ABOVE) + LUT(const std::vector& input, int flags = LUT_CLIP_BELOW | LUT_CLIP_ABOVE) : + maxs(input.size() - 2), + maxsf(maxs), + data(new T[input.size() + 3]), // Add a few extra elements so [](vfloat) won't access out-of-bounds memory. + clip(flags), + size(input.size()), + upperBound(size - 1), + owner(1), +#ifdef __SSE2__ + maxsv(F2V(maxs)), + sizev(F2V(size - 1)), + sizeiv(_mm_set1_epi32(size - 1)), +#endif + dirty(true) { #ifndef NDEBUG - if (input.size() <= 0) { - printf("s<=0!\n"); + if (input.empty()) { + printf("s=0!\n"); } - assert (input.size() > 0); + assert(!input.empty()); #endif - dirty = true; - clip = flags; - // Add a few extra elements so [](vfloat) won't access out-of-bounds memory. - // The routine would still produce the right answer, but might cause issues - // with address/heap checking programs. - data = new T[input.size() + 3]; - owner = 1; - size = input.size(); - upperBound = size - 1; - maxs = size - 2; - maxsf = (float)maxs; -#ifdef __SSE2__ - maxsv = F2V( maxs ); - sizeiv = _mm_set1_epi32( (int)(size - 1) ); - sizev = F2V( size - 1 ); -#endif - for (size_t i = 0; i < input.size(); ++i) { - data[i] = input[i]; - } + std::copy_n(input.begin(), input.size(), data); } void operator ()(int s, int flags = LUT_CLIP_BELOW | LUT_CLIP_ABOVE, bool initZero = false) @@ -256,7 +252,7 @@ public: return size > 0 ? upperBound : 0; } - LUT & operator=(const LUT& rhs) + LUT& operator=(const LUT& rhs) { if (this != &rhs) { if (rhs.size > this->size) { @@ -287,7 +283,7 @@ public: } // handy to sum up per thread histograms. #pragma omp simd speeds up the loop by about factor 3 for LUTu (uint32_t). - LUT & operator+=(const LUT& rhs) + LUT& operator+=(const LUT& rhs) { if (rhs.size == this->size) { #ifdef _OPENMP @@ -304,7 +300,7 @@ public: // multiply all elements of LUT with a constant float value template::value>::type> - LUT & operator*=(float factor) + LUT& operator*=(float factor) { #ifdef _OPENMP #pragma omp simd @@ -319,7 +315,7 @@ public: // divide all elements of LUT by a constant float value template::value>::type> - LUT & operator/=(float divisor) + LUT& operator/=(float divisor) { #ifdef _OPENMP #pragma omp simd @@ -489,7 +485,7 @@ public: // Return the value for "index" that is in the [0-1] range. template::value>::type> - T getVal01 (float index) const + T getVal01(float index) const { index *= (float)upperBound; int idx = (int)index; // don't use floor! The difference in negative space is no problems here @@ -514,19 +510,19 @@ public: return (p1 + p2 * diff); } - operator bool (void) const + operator bool() const // FIXME: Should be explicit { return size > 0; } - void clear(void) + void clear() { if (data && size) { memset(data, 0, size * sizeof(T)); } } - void reset(void) + void reset() { if (data) { delete[] data; diff --git a/rtengine/iplocallab.cc b/rtengine/iplocallab.cc index 74be48e09..11e2dcad7 100644 --- a/rtengine/iplocallab.cc +++ b/rtengine/iplocallab.cc @@ -57,6 +57,7 @@ namespace { + constexpr int limscope = 80; constexpr int mSPsharp = 39; //minimum size Spot Sharp due to buildblendmask constexpr int mSPwav = 32; //minimum size Spot Wavelet @@ -68,14 +69,16 @@ constexpr int TS = 64; // Tile size constexpr float epsilonw = 0.001f / (TS * TS); //tolerance constexpr int offset = 25; // shift between tiles -std::unique_ptr buildMeaLut(const float inVals[11], const float mea[10], float &lutFactor) { - +std::unique_ptr buildMeaLut(const float inVals[11], const float mea[10], float& lutFactor) +{ constexpr int lutSize = 100; - const float lutMax = ceil(mea[9]); + + const float lutMax = std::ceil(mea[9]); const float lutDiff = lutMax / lutSize; + std::vector lutVals(lutSize); int jStart = 1; - for (int i = 0; i < 100; ++i) { + for (int i = 0; i < lutSize; ++i) { const float val = i * lutDiff; if (val < mea[0]) { // still < first value => no interpolation @@ -87,35 +90,38 @@ std::unique_ptr buildMeaLut(const float inVals[11], const float mea[10], f lutVals[i] = inVals[j]; ++jStart; break; - } else if (val < mea[j]) { + } + if (val < mea[j]) { // interpolate const float dist = (val - mea[j - 1]) / (mea[j] - mea[j - 1]); lutVals[i] = rtengine::intp(dist, inVals[j], inVals[j - 1]); break; - } else { - lutVals[i] = inVals[10]; } + lutVals[i] = inVals[10]; } } } lutFactor = 1.f / lutDiff; return std::unique_ptr(new LUTf(lutVals)); - } -constexpr float clipLoc(float x) { +constexpr float clipLoc(float x) +{ return rtengine::LIM(x, 0.f, 32767.f); } -constexpr float clipDE(float x) { +constexpr float clipDE(float x) +{ return rtengine::LIM(x, 0.3f, 1.f); } -constexpr float clipC(float x) { +constexpr float clipC(float x) +{ return rtengine::LIM(x, -42000.f, 42000.f); } -constexpr float clipChro(float x) { +constexpr float clipChro(float x) +{ return rtengine::LIM(x, 0.f, 140.f); }