From ef66cb2e7f4eb0a419abecebc1befde844f86b62 Mon Sep 17 00:00:00 2001 From: Ingo Date: Thu, 9 Oct 2014 01:21:52 +0200 Subject: [PATCH] Increased precision for Shadows/Highlights (and a small speedup and reduced memory consumption for method 'Sharp mask'), Issue 2523 --- rtengine/dcrop.cc | 8 +- rtengine/improccoordinator.cc | 9 +- rtengine/shmap.cc | 184 ++++++++++++++++++---------------- rtengine/shmap.h | 20 ++-- 4 files changed, 120 insertions(+), 101 deletions(-) diff --git a/rtengine/dcrop.cc b/rtengine/dcrop.cc index 046539555..18e0d9757 100644 --- a/rtengine/dcrop.cc +++ b/rtengine/dcrop.cc @@ -204,6 +204,8 @@ void Crop::update (int todo) { double radius = sqrt (double(SKIPS(parent->fw,skip)*SKIPS(parent->fw,skip)+SKIPS(parent->fh,skip)*SKIPS(parent->fh,skip))) / 2.0; double shradius = params.sh.radius; if (!params.sh.hq) shradius *= radius / 1800.0; + if(!cshmap) + cshmap = new SHMap (cropw, croph, true); cshmap->update (baseCrop, shradius, parent->ipf.lumimul, params.sh.hq, skip); if(parent->shmap->min_f < 65535.f) // don't call forceStat with wrong values cshmap->forceStat (parent->shmap->max_f, parent->shmap->min_f, parent->shmap->avg); @@ -525,12 +527,14 @@ if (settings->verbose) printf ("setcropsizes before lock\n"); if (cbuffer ) delete [] cbuffer; if (cbuf_real) delete [] cbuf_real; - if (cshmap ) delete cshmap; + if (cshmap ) { delete cshmap; cshmap = NULL;} cbuffer = new float*[croph]; cbuf_real= new float[(croph+2)*cropw]; for (int i=0; iupdate (oprevi, shradius, ipf.lumimul, params.sh.hq, scale); } readyphase++; @@ -688,7 +690,8 @@ void ImProcCoordinator::freeAll () { delete previmg; delete workimg; - delete shmap; + if(shmap) + delete shmap; shmap = NULL; } allocated = false; @@ -733,7 +736,9 @@ if (settings->verbose) printf ("setscale before lock\n"); //ncie is only used in ImProcCoordinator::updatePreviewImage, it will be allocated on first use and deleted if not used anymore previmg = new Image8 (pW, pH); workimg = new Image8 (pW, pH); - shmap = new SHMap (pW, pH, true); + if(params.sh.enabled) { + shmap = new SHMap (pW, pH, true); + } allocated = true; } diff --git a/rtengine/shmap.cc b/rtengine/shmap.cc index 17c00eaae..951abaca9 100644 --- a/rtengine/shmap.cc +++ b/rtengine/shmap.cc @@ -21,11 +21,8 @@ #include "rtengine.h" #include "rt_math.h" #include "rawimagesource.h" -#include "sleef.c" #undef THREAD_PRIORITY_NORMAL -#ifdef __SSE2__ -#include "sleefsseavx.c" -#endif // __SSE2__ +#include "opthelper.h" namespace rtengine { @@ -36,6 +33,7 @@ SHMap::SHMap (int w, int h, bool multiThread) : W(w), H(h), multiThread(multiThr map = new float*[H]; for (int i=0; ir(i,j),0.f) + lumi[1]*std::max(img->g(i,j),0.f) + lumi[2]*std::max(img->b(i,j),0.f); + luminance[i][j] = lumi[0]*std::max(img->r(i,j),0.f) + lumi[1]*std::max(img->g(i,j),0.f) + lumi[2]*std::max(img->b(i,j),0.f); } + +} + +void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int skip) { if (!hq) { + fillLuminance( img, map, lumi); + #ifdef _OPENMP #pragma omp parallel #endif @@ -72,43 +76,64 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //experimental dirpyr shmap - float thresh = 100*radius;//1000; - LUTf rangefn(0x10000); - float ** dirpyrlo[2]; + float thresh = (100.f*radius);//1000; - int intfactor = 1024;//16384; - - //set up range functions - for (int i=0; i<0x10000; i++) { - //rangefn[i] = (int)(((thresh)/((double)(i) + (thresh)))*intfactor); - rangefn[i] = static_cast(xexpf(-(min(10.0f,(static_cast(i)*i) / (thresh*thresh))))*intfactor); - //if (rangefn[i]<0 || rangefn[i]>intfactor) - //printf("i=%d rangefn=%d arg=%f \n",i,rangefn[i], float(i*i) / (thresh*thresh)); + // set up range function + // calculate size of Lookup table. That's possible because from a value k for all i>=k rangefn[i] will be exp(-10) + // So we use this fact and the automatic clip of lut to reduce the size of lut and the number of calculations to fill the lut + // In past this lut had only integer precision with rangefn[i] = 0 for all i>=k + // We set the last element to a small epsilon 1e-15 instead of zero to avoid divisions by zero + const int lutSize = thresh * sqrtf(10.f) + 1; + thresh *= thresh; + LUTf rangefn(lutSize); + for (int i=0; i(i)*i) / thresh ));//*intfactor; + } + rangefn[lutSize-1] = 1e-15f; + + // We need one temporary buffer + float ** buffer = allocArray (W, H); + + // the final result has to be in map + // for an even number of levels that means: map => buffer, buffer => map + // for an odd number of levels that means: buffer => map, map => buffer, buffer => map + // so let's calculate the number of levels first + // There are at least two levels + int numLevels=2; + int scale=2; + while (skip*scale<16) { + scale *= 2; + numLevels++; } - dirpyrlo[0] = allocArray (W, H); - dirpyrlo[1] = allocArray (W, H); + float ** dirpyrlo[2]; + if(numLevels&1) { // odd number of levels, start with buffer + dirpyrlo[0] = buffer; + dirpyrlo[1] = map; + } else { // even number of levels, start with map + dirpyrlo[0] = map; + dirpyrlo[1] = buffer; + } - int scale=1; + fillLuminance( img, dirpyrlo[0], lumi); + + scale = 1; int level=0; int indx=0; - dirpyr_shmap(map, dirpyrlo[indx], W, H, rangefn, 0, scale ); + dirpyr_shmap(dirpyrlo[indx], dirpyrlo[1-indx], W, H, rangefn, level, scale ); scale *= 2; - level += 1; + level ++; indx = 1-indx; while (skip*scale<16) { - dirpyr_shmap(dirpyrlo[1-indx], dirpyrlo[indx], W, H, rangefn, level, scale ); + dirpyr_shmap(dirpyrlo[indx], dirpyrlo[1-indx], W, H, rangefn, level, scale ); scale *= 2; - level += 1; + level ++; indx = 1-indx; } - dirpyr_shmap(dirpyrlo[1-indx], map, W, H, rangefn, level, scale ); - - - freeArray(dirpyrlo[0], H); - freeArray(dirpyrlo[1], H); + dirpyr_shmap(dirpyrlo[indx], dirpyrlo[1-indx], W, H, rangefn, level, scale ); + freeArray(buffer, H); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% /* @@ -126,8 +151,7 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int } // update average, minimum, maximum - - float _avg = 0.0f; + double _avg = 0.0f; // use double precision to gain precision especially at systems with few cores and big pictures (error for 36 MPixel on single core was about 8% with float) min_f = 65535; max_f = 0; #ifdef _OPENMP @@ -138,7 +162,7 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int float _max_f = 0.0f; float _val; #ifdef _OPENMP -#pragma omp for reduction(+:_avg) nowait +#pragma omp for reduction(+:_avg) schedule(dynamic,16) nowait #endif for (int i=0; i