From 1a191363ce1728b15f0c8324c82342d23226c20d Mon Sep 17 00:00:00 2001 From: heckflosse Date: Mon, 17 Sep 2018 22:46:57 +0200 Subject: [PATCH 1/2] review ImProcFunctions::CompressDR(), fixes #4815 --- rtengine/improcfun.h | 3 +- rtengine/ipwavelet.cc | 99 ++++++------------------------------------- 2 files changed, 13 insertions(+), 89 deletions(-) diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index c857d3d05..a97ecef40 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -255,9 +255,8 @@ public: void EPDToneMapResid(float * WavCoeffs_L0, unsigned int Iterates, int skip, struct cont_params& cp, int W_L, int H_L, float max0, float min0); - float *CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost, float *Compressed); + void CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost); void ContrastResid(float * WavCoeffs_L0, struct cont_params &cp, int W_L, int H_L, float max0, float min0); - float *ContrastDR(float *Source, int W_L, int H_L, float *Contrast = nullptr); void EPDToneMap(LabImage *lab, unsigned int Iterates = 0, int skip = 1); void EPDToneMapCIE(CieImage *ncie, float a_w, float c_, int Wid, int Hei, float minQ, float maxQ, unsigned int Iterates = 0, int skip = 1); diff --git a/rtengine/ipwavelet.cc b/rtengine/ipwavelet.cc index def469b70..35a9d0f16 100644 --- a/rtengine/ipwavelet.cc +++ b/rtengine/ipwavelet.cc @@ -1425,67 +1425,11 @@ void ImProcFunctions::Eval2 (float ** WavCoeffs_L, int level, MaxN[level] = maxLN; } -float *ImProcFunctions::ContrastDR(float *Source, int W_L, int H_L, float *Contrast) -{ - int n = W_L * H_L; - - if(Contrast == nullptr) { - Contrast = new float[n]; - } - - memcpy(Contrast, Source, n * sizeof(float)); -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for (int i = 0; i < W_L * H_L; i++) { //contrast - Contrast[i] = Source[i] ; - } - - return Contrast; -} - -float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost, float *Compressed) +void ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost) { - const float eps = 0.000001f; - int n = W_L * H_L; - -#ifdef __SSE2__ -#ifdef _OPENMP - #pragma omp parallel -#endif - { - __m128 epsv = _mm_set1_ps( eps ); -#ifdef _OPENMP - #pragma omp for -#endif - - for(int ii = 0; ii < n - 3; ii += 4) { - _mm_storeu_ps( &Source[ii], xlogf(LVFU(Source[ii]) + epsv)); - } - } - - for(int ii = n - (n % 4); ii < n; ii++) { - Source[ii] = xlogf(Source[ii] + eps); - } - -#else -#ifdef _OPENMP - #pragma omp parallel for -#endif - - for(int ii = 0; ii < n; ii++) { - Source[ii] = xlogf(Source[ii] + eps); - } - -#endif - - float *ucr = ContrastDR(Source, W_L, H_L); - - if(Compressed == nullptr) { - Compressed = ucr; - } + constexpr float eps = 0.000001f; + const int n = W_L * H_L; float temp; @@ -1501,39 +1445,30 @@ float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compre float betemp = expf(-(2.f - DetailBoost + 0.694f)) - 1.f; //0.694 = log(2) temp = 1.2f * xlogf( -betemp); temp /= (-2.f * DetailBoost + 5.5f); - } - - else { + } else { temp = (Compression - 1.0f) / 20.f; } + temp += 1.f; + #ifdef __SSE2__ #ifdef _OPENMP #pragma omp parallel #endif { - __m128 cev, uev, sourcev; - __m128 epsv = _mm_set1_ps( eps ); - __m128 DetailBoostv = _mm_set1_ps( DetailBoost ); - __m128 tempv = _mm_set1_ps( temp ); + vfloat epsv = F2V(eps); + vfloat tempv = F2V(temp); #ifdef _OPENMP #pragma omp for #endif for(int i = 0; i < n - 3; i += 4) { - cev = xexpf(LVFU(Source[i]) + LVFU(ucr[i]) * (tempv)) - epsv; - uev = xexpf(LVFU(ucr[i])) - epsv; - sourcev = xexpf(LVFU(Source[i])) - epsv; - _mm_storeu_ps( &Source[i], sourcev); - _mm_storeu_ps( &Compressed[i], cev + DetailBoostv * (sourcev - uev) ); + STVFU(Source[i], xexpf(xlogf(LVFU(Source[i]) + epsv) * tempv) - epsv); } } for(int i = n - (n % 4); i < n; i++) { - float ce = xexpf(Source[i] + ucr[i] * (temp)) - eps; - float ue = xexpf(ucr[i]) - eps; - Source[i] = xexpf(Source[i]) - eps; - Compressed[i] = ce + DetailBoost * (Source[i] - ue); + Source[i] = xexpf(xlogf(Source[i] + eps) * temp) - eps; } #else @@ -1542,21 +1477,11 @@ float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compre #endif for(int i = 0; i < n; i++) { - float ce = xexpf(Source[i] + ucr[i] * (temp)) - eps; - float ue = xexpf(ucr[i]) - eps; - Source[i] = xexpf(Source[i]) - eps; - Compressed[i] = ce + DetailBoost * (Source[i] - ue); + Source[i] = xexpf(xlogf(Source[i] + eps) * temp) - eps; } #endif - if(Compressed != ucr) { - delete[] ucr; - } - - return Compressed; - - } void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, struct cont_params &cp, int W_L, int H_L, float max0, float min0) @@ -1589,7 +1514,7 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, struct cont_params &cp } - CompressDR(WavCoeffs_L0, W_L, H_L, Compression, DetailBoost, WavCoeffs_L0); + CompressDR(WavCoeffs_L0, W_L, H_L, Compression, DetailBoost); #ifdef _OPENMP From 2bea84cfa80b22ce06715961697d01873af911e5 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Mon, 17 Sep 2018 23:46:36 +0200 Subject: [PATCH 2/2] ImProcFunctions::CompressDR(): eliminate eps, #4815 --- rtengine/ipwavelet.cc | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/rtengine/ipwavelet.cc b/rtengine/ipwavelet.cc index 35a9d0f16..e4a61caa0 100644 --- a/rtengine/ipwavelet.cc +++ b/rtengine/ipwavelet.cc @@ -1427,48 +1427,46 @@ void ImProcFunctions::Eval2 (float ** WavCoeffs_L, int level, void ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost) { - - constexpr float eps = 0.000001f; const int n = W_L * H_L; - float temp; + float exponent; if(DetailBoost > 0.f && DetailBoost < 0.05f ) { float betemp = expf(-(2.f - DetailBoost + 0.694f)) - 1.f; //0.694 = log(2) - temp = 1.2f * xlogf( -betemp); - temp /= 20.f; + exponent = 1.2f * xlogf( -betemp); + exponent /= 20.f; } else if(DetailBoost >= 0.05f && DetailBoost < 0.25f ) { float betemp = expf(-(2.f - DetailBoost + 0.694f)) - 1.f; //0.694 = log(2) - temp = 1.2f * xlogf( -betemp); - temp /= (-75.f * DetailBoost + 23.75f); + exponent = 1.2f * xlogf( -betemp); + exponent /= (-75.f * DetailBoost + 23.75f); } else if(DetailBoost >= 0.25f) { float betemp = expf(-(2.f - DetailBoost + 0.694f)) - 1.f; //0.694 = log(2) - temp = 1.2f * xlogf( -betemp); - temp /= (-2.f * DetailBoost + 5.5f); + exponent = 1.2f * xlogf( -betemp); + exponent /= (-2.f * DetailBoost + 5.5f); } else { - temp = (Compression - 1.0f) / 20.f; + exponent = (Compression - 1.0f) / 20.f; } - temp += 1.f; + exponent += 1.f; + // now calculate Source = pow(Source, exponent) #ifdef __SSE2__ #ifdef _OPENMP #pragma omp parallel #endif { - vfloat epsv = F2V(eps); - vfloat tempv = F2V(temp); + vfloat exponentv = F2V(exponent); #ifdef _OPENMP #pragma omp for #endif for(int i = 0; i < n - 3; i += 4) { - STVFU(Source[i], xexpf(xlogf(LVFU(Source[i]) + epsv) * tempv) - epsv); + STVFU(Source[i], xexpf(xlogf(LVFU(Source[i])) * exponentv)); } } for(int i = n - (n % 4); i < n; i++) { - Source[i] = xexpf(xlogf(Source[i] + eps) * temp) - eps; + Source[i] = xexpf(xlogf(Source[i]) * exponent); } #else @@ -1477,7 +1475,7 @@ void ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compress #endif for(int i = 0; i < n; i++) { - Source[i] = xexpf(xlogf(Source[i] + eps) * temp) - eps; + Source[i] = xexpf(xlogf(Source[i]) * exponent); } #endif