From e269b38d1e1c37603a449101b9e3832d14a3b0bc Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 6 Jul 2020 15:25:04 +0200 Subject: [PATCH] Wavelet Blur levels: use LUT for pyramid, reduce memory usage, speedup --- rtengine/improcfun.h | 2 +- rtengine/iplocallab.cc | 36 ----------- rtengine/ipwavelet.cc | 135 +++++++++++++++++++---------------------- 3 files changed, 62 insertions(+), 111 deletions(-) diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 66ad4dc57..672935dbd 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -387,7 +387,7 @@ public: void Eval2(const float* const* WavCoeffs_L, int level, int W_L, int H_L, float *mean, float *meanN, float *sigma, float *sigmaN, float *MaxP, float *MaxN, int numThreads); void calceffect(int level, float *mean, float *sigma, float *mea, float effect, float offs); - + std::unique_ptr buildMeaLut(const float inVals[11], const float mea[10], float& lutFactor); void Aver(const float* HH_Coeffs, int datalen, float &averagePlus, float &averageNeg, float &max, float &min, int numThreads); void Sigma(const float* HH_Coeffs, int datalen, float averagePlus, float averageNeg, float &sigmaPlus, float &sigmaNeg, int numThreads); void calckoe(const float* const* WavCoeffs_LL, float gradw, float tloww, float ** koeLi, int level, int dir, int W_L, int H_L, float edd, float &maxkoeLi, float **tmC = nullptr); diff --git a/rtengine/iplocallab.cc b/rtengine/iplocallab.cc index e138444a2..d24827410 100644 --- a/rtengine/iplocallab.cc +++ b/rtengine/iplocallab.cc @@ -69,42 +69,6 @@ constexpr int TS = 64; // Tile size constexpr float epsilonw = 0.001f / (TS * TS); //tolerance constexpr int offset = 25; // shift between tiles -std::unique_ptr buildMeaLut(const float inVals[11], const float mea[10], float& lutFactor) -{ - constexpr int lutSize = 100; - - const float lutMax = std::ceil(mea[9]); - const float lutDiff = lutMax / lutSize; - - std::vector lutVals(lutSize); - int jStart = 1; - for (int i = 0; i < lutSize; ++i) { - const float val = i * lutDiff; - if (val < mea[0]) { - // still < first value => no interpolation - lutVals[i] = inVals[0]; - } else { - for (int j = jStart; j < 10; ++j) { - if (val == mea[j]) { - // exact match => no interpolation - lutVals[i] = inVals[j]; - ++jStart; - break; - } - if (val < mea[j]) { - // interpolate - const float dist = (val - mea[j - 1]) / (mea[j] - mea[j - 1]); - lutVals[i] = rtengine::intp(dist, inVals[j], inVals[j - 1]); - break; - } - lutVals[i] = inVals[10]; - } - } - } - lutFactor = 1.f / lutDiff; - return std::unique_ptr(new LUTf(lutVals)); -} - constexpr float clipLoc(float x) { return rtengine::LIM(x, 0.f, 32767.f); diff --git a/rtengine/ipwavelet.cc b/rtengine/ipwavelet.cc index 949dbafbd..98b0bf208 100644 --- a/rtengine/ipwavelet.cc +++ b/rtengine/ipwavelet.cc @@ -50,6 +50,8 @@ #endif #include "cplx_wavelet_dec.h" +#define BENCHMARK +#include "StopWatch.h" namespace rtengine { @@ -162,6 +164,41 @@ struct cont_params { int wavNestedLevels = 1; +std::unique_ptr ImProcFunctions::buildMeaLut(const float inVals[11], const float mea[10], float& lutFactor) +{ + constexpr int lutSize = 100; + + const float lutMax = std::ceil(mea[9]); + const float lutDiff = lutMax / lutSize; + + std::vector lutVals(lutSize); + int jStart = 1; + for (int i = 0; i < lutSize; ++i) { + const float val = i * lutDiff; + if (val < mea[0]) { + // still < first value => no interpolation + lutVals[i] = inVals[0]; + } else { + for (int j = jStart; j < 10; ++j) { + if (val == mea[j]) { + // exact match => no interpolation + lutVals[i] = inVals[j]; + ++jStart; + break; + } + if (val < mea[j]) { + // interpolate + const float dist = (val - mea[j - 1]) / (mea[j] - mea[j - 1]); + lutVals[i] = rtengine::intp(dist, inVals[j], inVals[j - 1]); + break; + } + lutVals[i] = inVals[10]; + } + } + } + lutFactor = 1.f / lutDiff; + return std::unique_ptr(new LUTf(lutVals)); +} void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const procparams::WaveletParams & waparams, const WavCurve & wavCLVCcurve, const Wavblcurve & wavblcurve, const WavOpacityCurveRG & waOpacityCurveRG, const WavOpacityCurveSH & waOpacityCurveSH, const WavOpacityCurveBY & waOpacityCurveBY, const WavOpacityCurveW & waOpacityCurveW, const WavOpacityCurveWL & waOpacityCurveWL, const LUTf &wavclCurve, int skip) @@ -1974,6 +2011,7 @@ void ImProcFunctions::WaveletcontAllLfinal(wavelet_decomposition& WaveletCoeffs_ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float **varchrom, wavelet_decomposition& WaveletCoeffs_L, const Wavblcurve & wavblcurve, struct cont_params &cp, int skip, float *mean, float *sigma, float *MaxP, float *MaxN, const WavCurve & wavCLVCcurve, const WavOpacityCurveW & waOpacityCurveW, const WavOpacityCurveSH & waOpacityCurveSH, FlatCurve* ChCurve, bool Chutili) { + BENCHFUN const int maxlvl = WaveletCoeffs_L.maxlevel(); const int W_L = WaveletCoeffs_L.level_W(0); const int H_L = WaveletCoeffs_L.level_H(0); @@ -2129,10 +2167,6 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * } } -// - int n0, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n32; - n0 = n1 = n2 = n3 = n4 = n5 = n6 = n7 = n8 = n9 = n10 = n32 = 0; - float *koeLi[12]; const std::unique_ptr koeLibuffer(new float[12 * H_L * W_L]()); @@ -2281,13 +2315,14 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * for (int i = 0; i < 500; i++) { if (wavblcurve[i] != 0.) { wavcurvecomp = true; + break; } } } + std::unique_ptr aft; #ifdef _OPENMP - // #pragma omp for schedule(dynamic) collapse(2) - #pragma omp for reduction(+:n0, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n32) schedule(dynamic) collapse(2) + #pragma omp for schedule(dynamic) collapse(2) #endif for (int dir = 1; dir < 4; dir++) { @@ -2299,85 +2334,37 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float * float* const* WavCoeffs_L = WaveletCoeffs_L.level_coeffs(lvl); ContAllL(koeLi, maxkoeLi[lvl * 3 + dir - 1], true, maxlvl, labco, varhue, varchrom, WavCoeffs_L, WavCoeffs_L0, lvl, dir, cp, Wlvl_L, Hlvl_L, skip, mean, sigma, MaxP, MaxN, wavCLVCcurve, waOpacityCurveW, waOpacityCurveSH, ChCurve, Chutili); - int minWL = min(Wlvl_L, Hlvl_L); - if(minWL > 180) { + if (std::min(Wlvl_L, Hlvl_L) > 180) { if (wavblcurve && wavcurvecomp && cp.blena) { // printf("Blur level L\n"); float mea[10]; const float effect = cp.bluwav; constexpr float offs = 1.f; - float * beta = new float[Wlvl_L * Hlvl_L]; - calceffect(lvl, mean, sigma, mea, effect, offs); - - float * bef = new float[Wlvl_L * Hlvl_L]; - float * aft = new float[Wlvl_L * Hlvl_L]; - - for (int co = 0; co < Hlvl_L * Wlvl_L; co++) { - bef[co] = WavCoeffs_L[dir][co]; - float WavCL = std::fabs(WavCoeffs_L[dir][co]); - - if (WavCL < mea[0]) { - beta[co] = 0.05f; - n0++; - - if (WavCL < 32.7) { - n32++; - } - } else if (WavCL < mea[1]) { - beta[co] = 0.2f; - n1++; - } else if (WavCL < mea[2]) { - beta[co] = 0.7f; - n2++; - } else if (WavCL < mea[3]) { - beta[co] = 1.f; //standard - n3++; - } else if (WavCL < mea[4]) { - beta[co] = 1.f; - n4++; - } else if (WavCL < mea[5]) { - beta[co] = 0.8f; //+sigma - n5++; - } else if (WavCL < mea[6]) { - beta[co] = 0.6f; - n6++; - } else if (WavCL < mea[7]) { - beta[co] = 0.4f; - n7++; - } else if (WavCL < mea[8]) { - beta[co] = 0.2f; // + 2 sigma - n8++; - } else if (WavCL < mea[9]) { - beta[co] = 0.1f; - n9++; - } else { - beta[co] = 0.01f; - n10++; - } - - + float lutFactor; + const float inVals[] = {0.05f, 0.2f, 0.7f, 1.f, 1.f, 0.8f, 0.6f, 0.4f, 0.2f, 0.1f, 0.01f}; + const auto meaLut = buildMeaLut(inVals, mea, lutFactor); + if (!aft.get()) { + aft.reset(new float[Wlvl_L * Hlvl_L]); } - if (settings->verbose) { - printf("lvl=%i n0=%i n32=%i n1=%i n2=%i n3=%i n4=%i n5=%i n6=%i n7=%i n8=%i n9=%i n10=%i\n", lvl, n0, n0 - n32, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10); + //blur level + const float klev = wavblcurve[lvl * 55.5f] * 80.f / skip; + auto WavL = WavCoeffs_L[dir]; + boxblur(WavL, aft.get(), klev, Wlvl_L, Hlvl_L, false); + + int co = 0; +#ifdef __SSE2__ + const vfloat lutFactorv = F2V(lutFactor); + for (; co < Hlvl_L * Wlvl_L - 3; co += 4) { + const vfloat valv = LVFU(WavL[co]); + STVFU(WavL[co], intp((*meaLut)[vabsf(valv) * lutFactorv], LVFU(aft[co]), valv)); } - - float klev = (wavblcurve[lvl * 55.5f]); - - //blur level - klev *= 80.f / skip; - boxblur(bef, aft, klev, Wlvl_L, Hlvl_L, false); - - for (int co = 0; co < Hlvl_L * Wlvl_L; co++) { - aft[co] = bef[co] * (1.f - beta[co]) + aft[co] * beta[co]; - WavCoeffs_L[dir][co] = aft[co]; +#endif + for (; co < Hlvl_L * Wlvl_L; co++) { + WavL[co] = intp((*meaLut)[std::fabs(WavL[co]) * lutFactor], aft[co], WavL[co]); } - - delete[] bef; - delete[] aft; - delete[] beta; } } }