From 8728aee0cee59984c41ed53a5e2f70fd105d5e3f Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Sat, 4 Jul 2020 11:35:37 +0200 Subject: [PATCH] fix broken non SSE build and add SSE code for one loop in wavcont() --- rtengine/LUT.h | 2 +- rtengine/iplocallab.cc | 44 ++++++++++++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/rtengine/LUT.h b/rtengine/LUT.h index c426bbaaa..416ae689a 100644 --- a/rtengine/LUT.h +++ b/rtengine/LUT.h @@ -99,8 +99,8 @@ protected: unsigned int size; unsigned int upperBound; // always equals size-1, parameter created for performance reason private: -#ifdef __SSE2__ unsigned int owner; +#ifdef __SSE2__ alignas(16) vfloat maxsv; alignas(16) vfloat sizev; alignas(16) vint sizeiv; diff --git a/rtengine/iplocallab.cc b/rtengine/iplocallab.cc index 11e2dcad7..689054a5d 100644 --- a/rtengine/iplocallab.cc +++ b/rtengine/iplocallab.cc @@ -7184,16 +7184,6 @@ void ImProcFunctions::wavcont(const struct local_params& lp, float ** tmp, wavel float lutFactor; const float inVals[] = {0.05f, 0.2f, 0.7f, 1.f, 1.f, 0.8f, 0.65f, 0.5f, 0.4f, 0.25f, 0.1f}; const auto meaLut = buildMeaLut(inVals, mea, lutFactor); - - float klev = (loccomprewavCurve[level * 55.5f] - 0.75f); - if (klev < 0.f) { - klev *= 2.6666f;//compression increase contraste - } else { - klev *= 4.f;//dilatation reduce contraste - detailattenuator - } - const float compression = expf(-klev); - const float detailattenuator = std::max(klev, 0.f); - const auto wav_L = wdspot.level_coeffs(level)[dir]; #ifdef _OPENMP @@ -7206,14 +7196,38 @@ void ImProcFunctions::wavcont(const struct local_params& lp, float ** tmp, wavel } } + float klev = (loccomprewavCurve[level * 55.5f] - 0.75f); + if (klev < 0.f) { + klev *= 2.6666f;//compression increase contraste + } else { + klev *= 4.f;//dilatation reduce contraste - detailattenuator + } + const float compression = expf(-klev); + const float detailattenuator = std::max(klev, 0.f); + Compresslevels(templevel, W_L, H_L, compression, detailattenuator, thres, mean[level], MaxP[level], meanN[level], MaxN[level], madL[level][dir - 1]); #ifdef _OPENMP - #pragma omp parallel for if (multiThread) + #pragma omp parallel if (multiThread) #endif - for (int y = 0; y < H_L; y++) { - for (int x = 0; x < W_L; x++) { - int j = y * W_L + x; - wav_L[j] = intp((*meaLut)[std::fabs(wav_L[j]) * lutFactor], templevel[y][x], wav_L[j]); + { +#ifdef __SSE2__ + const vfloat lutFactorv = F2V(lutFactor); +#endif +#ifdef _OPENMP + #pragma omp for +#endif + for (int y = 0; y < H_L; y++) { + int x = 0; + int j = y * W_L; +#ifdef __SSE2__ + for (; x < W_L - 3; x += 4, j += 4) { + const vfloat valv = LVFU(wav_L[j]); + STVFU(wav_L[j], intp((*meaLut)[vabsf(valv) * lutFactorv], LVFU(templevel[y][x]), valv)); + } +#endif + for (; x < W_L; x++, j++) { + wav_L[j] = intp((*meaLut)[std::fabs(wav_L[j]) * lutFactor], templevel[y][x], wav_L[j]); + } } } }