diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index f1d331acd..0b4b90a31 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -25,7 +25,6 @@ #include #include #include "../rtgui/threadutils.h" - #include "rtengine.h" #include "improcfun.h" #include "LUT.h" @@ -36,19 +35,12 @@ #include "mytime.h" #include "sleef.c" #include "opthelper.h" +#include "cplx_wavelet_dec.h" #ifdef _OPENMP #include #endif -#include "cplx_wavelet_dec.h" - -//#define MIN(a,b) ((a) < (b) ? (a) : (b)) -//#define MAX(a,b) ((a) > (b) ? (a) : (b)) -//#define LIM(x,min,max) MAX(min,MIN(x,max)) -//#define ULIM(x,y,z) ((y) < (z) ? LIM(x,y,z) : LIM(x,z,y)) -//#define CLIP(x) LIM(x,0,65535) - #define TS 64 // Tile size #define offset 25 // shift between tiles #define fTS ((TS/2+1)) // second dimension of Fourier tiles @@ -139,11 +131,10 @@ extern const Settings* settings; // Median calculation using quicksort float fq_sort2(float arr[], int n) { - int low, high ; - int median; - int middle, ll, hh; + int low = 0; + int high = n-1; + int median = (low + high) / 2; - low = 0 ; high = n-1 ; median = (low + high) / 2; for (;;) { if (high <= low) return arr[median] ; @@ -153,49 +144,47 @@ float fq_sort2(float arr[], int n) return arr[median] ; } - middle = (low + high) / 2; - if (arr[middle] > arr[high]) ELEM_FLOAT_SWAP(arr[middle], arr[high]) ; - if (arr[low] > arr[high]) ELEM_FLOAT_SWAP(arr[low], arr[high]) ; - if (arr[middle] > arr[low]) ELEM_FLOAT_SWAP(arr[middle], arr[low]) ; + int middle = (low + high) / 2; + if (arr[middle] > arr[high]) ELEM_FLOAT_SWAP(arr[middle], arr[high]) ; + if (arr[low] > arr[high]) ELEM_FLOAT_SWAP(arr[low], arr[high]) ; + if (arr[middle] > arr[low]) ELEM_FLOAT_SWAP(arr[middle], arr[low]) ; - ELEM_FLOAT_SWAP(arr[middle], arr[low+1]) ; - ll = low + 1; - hh = high; + ELEM_FLOAT_SWAP(arr[middle], arr[low+1]) ; + int ll = low + 1; + int hh = high; - for (;;) { - do ll++; while (arr[low] > arr[ll]) ; - do hh--; while (arr[hh] > arr[low]) ; + for (;;) { + do ll++; while (arr[low] > arr[ll]) ; + do hh--; while (arr[hh] > arr[low]) ; - if (hh < ll) - break; + if (hh < ll) + break; - ELEM_FLOAT_SWAP(arr[ll], arr[hh]) ; - } + ELEM_FLOAT_SWAP(arr[ll], arr[hh]) ; + } - ELEM_FLOAT_SWAP(arr[low], arr[hh]) ; + ELEM_FLOAT_SWAP(arr[low], arr[hh]) ; - if (hh <= median) - low = ll; - if (hh >= median) - high = hh - 1; + if (hh <= median) + low = ll; + if (hh >= median) + high = hh - 1; } } float media(float *elements, int N) { - int i,j,min; - float temp; // Order elements (only half of them) - for (i = 0; i < (N >> 1) + 1; ++i) + for (int i = 0; i < (N >> 1) + 1; ++i) { // Find position of minimum element - min = i; - for (j = i + 1; j < N; ++j) + int min = i; + for (int j = i + 1; j < N; ++j) if (elements[j] < elements[min]) min = j; // Put found minimum element in its place - temp = elements[i]; + float temp = elements[i]; elements[i] = elements[min]; elements[min] = temp; } @@ -242,7 +231,7 @@ void ImProcFunctions::Tile_calc (int tilesize, int overlap, int kall, int imwidt } int denoiseNestedLevels = 1; - +enum nrquality {QUALITY_STANDARD, QUALITY_HIGH}; SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst,Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &nresi, float &highresi) { @@ -250,7 +239,6 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef MyTime t1e,t2e; t1e.set(); //#endif - if (dnparams.luma==0 && dnparams.chroma==0 && !dnparams.median && !noiseLCurve && !noiseCCurve) { //nothing to do; copy src to dst or do nothing in case src == dst if(src != dst) @@ -261,85 +249,107 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef } return; - } + } + static MyMutex FftwMutex; MyMutex::MyLock lock(FftwMutex); - int hei,wid; + + const nrquality nrQuality = (dnparams.smethod == "shal") ? QUALITY_STANDARD : QUALITY_HIGH;//shrink method + const float qhighFactor = (nrQuality == QUALITY_HIGH) ? 1.f/(float) settings->nrhigh : 1.0f; + const bool useNoiseCCurve = (noiseCCurve && noiseCCurve.getSum() > 5.f ); + const bool useNoiseLCurve = (noiseLCurve && noiseLCurve.getSum() >= 7.f ); + + int hei; float** lumcalc; - float** acalc; - float** bcalc; - if(noiseLCurve || noiseCCurve) { + float** ccalc; + + bool ponder=false; + float ponderCC=1.f; + if(settings->leveldnautsimpl==1 && params->dirpyrDenoise.Cmethod=="PON") {ponder=true;ponderCC=0.5f;} + if(settings->leveldnautsimpl==1 && params->dirpyrDenoise.Cmethod=="PRE") {ponderCC=0.5f;} + if(settings->leveldnautsimpl==0 && params->dirpyrDenoise.Cmethod=="PREV") {ponderCC=0.5f;} + + const bool denoiseMethodRgb = (dnparams.dmethod == "RGB"); + + // init luma noisevarL + const float noiseluma=(float) dnparams.luma; + const float noisevarL = (useNoiseLCurve && (denoiseMethodRgb || !isRAW)) ? (float) (SQR(((noiseluma+1.0)/125.0)*(10.+ (noiseluma+1.0)/25.0))) : (float) (SQR((noiseluma/125.0)*(1.0+ noiseluma/25.0))); + + if(useNoiseLCurve || useNoiseCCurve) { hei=calclum->height; - wid=calclum->width; + int wid=calclum->width; TMatrix wprofi = iccStore->workingSpaceMatrix (params->icm.working); - double wpi[3][3] = { - {wprofi[0][0],wprofi[0][1],wprofi[0][2]}, - {wprofi[1][0],wprofi[1][1],wprofi[1][2]}, - {wprofi[2][0],wprofi[2][1],wprofi[2][2]} + const float wpi[3][3] = { + {static_cast(wprofi[0][0]),static_cast(wprofi[0][1]),static_cast(wprofi[0][2])}, + {static_cast(wprofi[1][0]),static_cast(wprofi[1][1]),static_cast(wprofi[1][2])}, + {static_cast(wprofi[2][0]),static_cast(wprofi[2][1]),static_cast(wprofi[2][2])} }; - lumcalc = new float*[(hei+1)/2]; - for (int i=0; i<(hei+1)/2; i++) - lumcalc[i] = new float[(wid+1)/2]; - acalc = new float*[(hei+1)/2]; - for (int i=0; i<(hei+1)/2; i++) - acalc[i] = new float[(wid+1)/2]; - bcalc = new float*[(hei+1)/2]; - for (int i=0; i<(hei+1)/2; i++) - bcalc[i] = new float[(wid+1)/2]; + lumcalc = new float*[(hei)]; + for (int i=0; i<(hei); i++) + lumcalc[i] = new float[(wid)]; + ccalc = new float*[(hei)]; + for (int i=0; i<(hei); i++) + ccalc[i] = new float[(wid)]; + + float cn100Precalc; + if(useNoiseCCurve) + cn100Precalc = SQR(1.f + ponderCC*(4.f*noiseCCurve[100.f / 60.f])); #ifdef _OPENMP #pragma omp parallel for #endif - for(int ii=0;iir(ii,jj); float GL = calclum->g(ii,jj); float BL = calclum->b(ii,jj); - // determine luminance for noisecurve + // determine luminance and chrominance for noisecurves float XL,YL,ZL; Color::rgbxyz(RL,GL,BL,XL,YL,ZL,wpi); - Color::XYZ2Lab(XL, YL, ZL, LLum, AAum, BBum); - lumcalc[ii>>1][jj>>1]=LLum; - acalc[ii>>1][jj>>1]=AAum; - bcalc[ii>>1][jj>>1]=BBum; + Color::XYZ2Lab(XL, YL, ZL, LLum, AAum, BBum); + if(useNoiseLCurve) { + float epsi = 0.01f; + if(LLum<2.f) + LLum = 2.f;//avoid divided by zero + if(LLum>32768.f) + LLum = 32768.f; // not strictly necessary + float kinterm = epsi + noiseLCurve[xdivf(LLum,15)*500.f]; + kinterm *= 100.f; + kinterm += noiseluma; + lumcalc[ii][jj] = SQR((kinterm/125.f)*(1.f+kinterm/25.f)); + } + if(useNoiseCCurve) { + float cN = sqrtf(SQR(AAum)+SQR(BBum)); + if(cN > 100) + ccalc[ii][jj] = SQR(1.f + ponderCC*(4.f*noiseCCurve[cN / 60.f])); + else + ccalc[ii][jj] = cn100Precalc; + } } - } + } + delete calclum; calclum = NULL; -} - - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - /*if (plistener) { - plistener->setProgressStr ("Denoise..."); - plistener->setProgress (0.0); - }*/ - -// volatile double progress = 0.0; - - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - + } + const short int imheight=src->height, imwidth=src->width; - Qhigh=1.0f; - if(dnparams.smethod=="shalbi") - Qhigh=1.f/(float) settings->nrhigh; + if (dnparams.luma!=0 || dnparams.chroma!=0 || dnparams.methodmed=="Lab" || dnparams.methodmed=="Lonly" ) { - const bool perf = (dnparams.dmethod=="RGB"); // gamma transform for input data float gam = dnparams.gamma; float gamthresh = 0.001f; if(!isRAW) {//reduce gamma under 1 for Lab mode ==> TIF and JPG - if(gam <1.9f) - gam=1.f - (1.9f-gam)/3.f;//minimum gamma 0.7 + if(gam < 1.9f) + gam = 1.f - (1.9f-gam)/3.f;//minimum gamma 0.7 else if (gam >= 1.9f && gam <= 3.f) - gam=(1.4f/1.1f)*gam - 1.41818f; + gam = (1.4f/1.1f)*gam - 1.41818f; } float gamslope = exp(log((double)gamthresh)/gam)/gamthresh; LUTf gamcurve(65536,LUT_CLIP_BELOW); - if(perf) { + if(denoiseMethodRgb) { for (int i=0; i<65536; i++) { gamcurve[i] = (Color::gamma((double)i/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)) * 32768.0f; } @@ -355,7 +365,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef float igamslope = 1.f/gamslope; LUTf igamcurve(65536,LUT_CLIP_BELOW); - if(perf) { + if(denoiseMethodRgb) { for (int i=0; i<65536; i++) { igamcurve[i] = (Color::gamma((float)i/32768.0f, igam, igamthresh, igamslope, 1.0, 0.0) * 65535.0f); } @@ -366,43 +376,58 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef } const float gain = pow (2.0f, float(expcomp)); - float incr=1.f; - float noisevar_Ldetail = SQR((float)(SQR(100.-dnparams.Ldetail) + 50.*(100.-dnparams.Ldetail)) * TS * 0.5f * incr); - bool enhance_denoise = dnparams.enhance; - int gamlab = settings->denoiselabgamma;//gamma lab essentialy for Luminance detail - if(gamlab > 2) - gamlab=2; + float noisevar_Ldetail = SQR((float)(SQR(100.-dnparams.Ldetail) + 50.*(100.-dnparams.Ldetail)) * TS * 0.5f); + if(settings->verbose) - printf("Denoise Lab=%i\n",gamlab); - + printf("Denoise Lab=%i\n",settings->denoiselabgamma); + + // To avoid branches in loops we access the gammatabs by pointers + // modify arbitrary data for Lab..I have test : nothing, gamma 2.6 11 - gamma 4 5 - gamma 5.5 10 + // we can put other as gamma g=2.6 slope=11, etc. + // but noting to do with real gamma !!!: it's only for data Lab # data RGB + // finally I opted fot gamma55 and with options we can change + + LUTf *denoisegamtab; + LUTf *denoiseigamtab; + switch(settings->denoiselabgamma) { + case 0: denoisegamtab = &(Color::gammatab_26_11); + denoiseigamtab = &(Color::igammatab_26_11); + break; + case 1: denoisegamtab = &(Color::gammatab_4); + denoiseigamtab = &(Color::igammatab_4); + break; + default: denoisegamtab = &(Color::gammatab_55); + denoiseigamtab = &(Color::igammatab_55); + break; + } + array2D tilemask_in(TS,TS); array2D tilemask_out(TS,TS); - const int border = MAX(2,TS/16); - - + const int border = MAX(2,TS/16); #ifdef _OPENMP #pragma omp parallel for #endif - for (int i=0; iTS/2 ? i-TS+1 : i)); - float vmask = (i1TS/2 ? j-TS+1 : j)); - tilemask_in[i][j] = (vmask * (j1TS/2 ? i-TS+1 : i)); + float vmask = (i1TS/2 ? j-TS+1 : j)); + tilemask_in[i][j] = (vmask * (j1data[n] = 0; - + Imagefloat * dsttmp = new Imagefloat(imwidth,imheight); + for (int n=0; n<3*imwidth*imheight; n++) + dsttmp->data[n] = 0; + int tilesize; int overlap; if(settings->leveldnti ==0) { @@ -451,12 +476,12 @@ if(settings->leveldnti ==1) { fftwf_free ( Lbloxtmp ); fftwf_free ( fLbloxtmp ); +#ifndef _OPENMP int numthreads = 1; - -#ifdef _OPENMP +#else // Calculate number of tiles. If less than omp_get_max_threads(), then limit num_threads to number of tiles int numtiles = numtiles_W * numtiles_H; - numthreads = MIN(numtiles,omp_get_max_threads()); + int numthreads = MIN(numtiles,omp_get_max_threads()); if(options.rgbDenoiseThreadLimit > 0) numthreads = MIN(numthreads,options.rgbDenoiseThreadLimit); denoiseNestedLevels = omp_get_max_threads() / numthreads; @@ -483,17 +508,17 @@ if(settings->leveldnti ==1) { TMatrix wiprof = iccStore->workingSpaceInverseMatrix (params->icm.working); //inverse matrix user select const float wip[3][3] = { - {wiprof[0][0],wiprof[0][1],wiprof[0][2]}, - {wiprof[1][0],wiprof[1][1],wiprof[1][2]}, - {wiprof[2][0],wiprof[2][1],wiprof[2][2]} + {static_cast(wiprof[0][0]),static_cast(wiprof[0][1]),static_cast(wiprof[0][2])}, + {static_cast(wiprof[1][0]),static_cast(wiprof[1][1]),static_cast(wiprof[1][2])}, + {static_cast(wiprof[2][0]),static_cast(wiprof[2][1]),static_cast(wiprof[2][2])} }; TMatrix wprof = iccStore->workingSpaceMatrix (params->icm.working); const float wp[3][3] = { - {wprof[0][0],wprof[0][1],wprof[0][2]}, - {wprof[1][0],wprof[1][1],wprof[1][2]}, - {wprof[2][0],wprof[2][1],wprof[2][2]} + {static_cast(wprof[0][0]),static_cast(wprof[0][1]),static_cast(wprof[0][2])}, + {static_cast(wprof[1][0]),static_cast(wprof[1][1]),static_cast(wprof[1][2])}, + {static_cast(wprof[2][0]),static_cast(wprof[2][1]),static_cast(wprof[2][2])} }; @@ -502,20 +527,15 @@ if(settings->leveldnti ==1) { #endif { float resid=0.f; - float nbresid=0; + float nbresid=0.f; float maxredresid=0.f; float maxblueresid=0.f; float residred=0.f; float residblue=0.f; - int pos; - float** noisevarlum = new float*[(tileheight+1)/2]; - for (int i=0; i<(tileheight+1)/2; i++) - noisevarlum[i] = new float[(tilewidth+1)/2]; - float** noisevarchrom = new float*[(tileheight+1)/2]; - for (int i=0; i<(tileheight+1)/2; i++) - noisevarchrom[i] = new float[(tilewidth+1)/2]; - + int pos; + float* noisevarlum = new float[((tileheight+1)/2)*((tilewidth+1)/2)]; + float* noisevarchrom = new float[((tileheight+1)/2)*((tilewidth+1)/2)]; #ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) @@ -524,31 +544,13 @@ if(settings->leveldnti ==1) { for (int tiletop=0; tiletopleveldnautsimpl==1 && params->dirpyrDenoise.Cmethod=="PON") {ponder=true;ponderCC=0.5f;} - if(settings->leveldnautsimpl==1 && params->dirpyrDenoise.Cmethod=="PRE") {ponderCC=0.5f;} - if(settings->leveldnautsimpl==0 && params->dirpyrDenoise.Cmethod=="PREV") {ponderCC=0.5f;} - - float realred2, realred, realblue, realblue2; + int width2 = (width+1)/2; + float realred, realblue; float interm_med =(float) dnparams.chroma/10.0; float intermred, intermblue; if(dnparams.redchro > 0.) intermred=(dnparams.redchro/10.); else intermred= (float) dnparams.redchro/7.0;//increase slower than linear for more sensit @@ -561,74 +563,50 @@ if(settings->leveldnti ==1) { const float noisevarab_b = SQR(realblue); // printf("Ch=%f red=%f blu=%f\n",interm_med, intermred,intermblue ); + + //pixel weight + array2D totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks + + //residual between input and denoised L channel + array2D Ldetail(width,height,ARRAY2D_CLEAR_DATA); + //input L channel array2D Lin(width,height); //wavelet denoised image LabImage * labdn = new LabImage(width,height); - float* mad_LL = new float [height*width]; - - float* mad_aa = new float [height*width]; - float* mad_bb = new float [height*width]; - - //residual between input and denoised L channel - array2D Ldetail(width,height,ARRAY2D_CLEAR_DATA); - //pixel weight - array2D totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks - // init luma noisevarL - float noiseluma=(float) dnparams.luma; - if(perf && noiseLCurve) - noiseluma += 1.f; - float noisevarL = (float) (SQR((noiseluma/125.0)*(1.+ noiseluma/25.0))); - // printf("nova=%f\n",noisevarL); - //TODO: implement using AlignedBufferMP - //fill tile from image; convert RGB to "luma/chroma" + + //fill tile from image; convert RGB to "luma/chroma" + const float maxNoiseVarab = max(noisevarab_b,noisevarab_r); if (isRAW) {//image is raw; use channel differences for chroma channels - if(!perf){//lab mode + if(!denoiseMethodRgb){//lab mode //modification Jacques feb 2013 and july 2014 #ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) #endif - for (int i=tiletop/*, i1=0*/; ir(i,j); float G_ = gain*src->g(i,j); float B_ = gain*src->b(i,j); - //modify arbitrary data for Lab..I have test : nothing, gamma 2.6 11 - gamma 4 5 - gamma 5.5 10 - //we can put other as gamma g=2.6 slope=11, etc. - // but noting to do with real gamma !!!: it's only for data Lab # data RGB - //finally I opted fot gamma55 and with options we can change - if (gamlab == 0) {// options 12/2013 - R_ = Color::igammatab_26_11[R_]; - G_ = Color::igammatab_26_11[G_]; - B_ = Color::igammatab_26_11[B_]; - } - else if (gamlab == 1) { - //other new gamma 4 5 - R_ = Color::igammatab_4[R_]; - G_ = Color::igammatab_4[G_]; - B_ = Color::igammatab_4[B_]; - } - else if (gamlab == 2) { - //new gamma 5.5 10 better for detail luminance..it is a compromise...which depends on the image (distribution BL, ML, HL ...) - R_ = Color::igammatab_55[R_]; - G_ = Color::igammatab_55[G_]; - B_ = Color::igammatab_55[B_]; - } + R_ = (*denoiseigamtab)[R_]; + G_ = (*denoiseigamtab)[G_]; + B_ = (*denoiseigamtab)[B_]; + //apply gamma noise standard (slider) R_ = R_<65535.0f ? gamcurve[R_] : (Color::gammanf(R_/65535.f, gam)*32768.0f); G_ = G_<65535.0f ? gamcurve[G_] : (Color::gammanf(G_/65535.f, gam)*32768.0f); B_ = B_<65535.0f ? gamcurve[B_] : (Color::gammanf(B_/65535.f, gam)*32768.0f); //true conversion xyz=>Lab - float L,a,b; float X,Y,Z; Color::rgbxyz(R_,G_,B_,X,Y,Z,wp); //convert to Lab + float L,a,b; Color::XYZ2Lab(X, Y, Z, L, a, b); labdn->L[i1][j1] = L; @@ -637,28 +615,9 @@ if(settings->leveldnti ==1) { Lin[i1][j1] = L; - if(((i1|j1)&1) == 0) { - if(noiseLCurve) { - float kN = lumcalc[i>>1][j>>1]; //with no gamma and take into account working profile - float epsi = 0.01f; - if(kN<2.f) - kN = 2.f;//avoid divided by zero - if(kN>32768.f) - kN = 32768.f; // not strictly necessary - float kinterm = epsi + noiseLCurve[xdivf(kN,15)*500.f]; - float ki = kinterm*100.f; - ki += noiseluma; - noisevarlum[i1>>1][j1>>1] = SQR((ki/125.f)*(1.f+ki/25.f)); - } - if(noiseCCurve) { - float aN = acalc[i>>1][j>>1]; - float bN = bcalc[i>>1][j>>1]; - float cN = sqrtf(SQR(aN)+SQR(bN)); - if(cN < 100.f) - cN = 100.f;//avoid divided by zero - float Cinterm = 1.f + ponderCC*4.f*noiseCCurve[cN/60.f];//C=f(C) - noisevarchrom[i1>>1][j1>>1] = max(noisevarab_b,noisevarab_r)*SQR(Cinterm); - } + if(((i1|j1)&1) == 0) { + noisevarlum[(i1>>1)*width2+(j1>>1)] = useNoiseLCurve ? lumcalc[i>>1][j>>1] : noisevarL; + noisevarchrom[(i1>>1)*width2+(j1>>1)] = useNoiseCCurve ? maxNoiseVarab*ccalc[i>>1][j>>1] : 1.f; } //end chroma @@ -669,9 +628,9 @@ if(settings->leveldnti ==1) { #ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) #endif - for (int i=tiletop/*, i1=0*/; ir(i,j); @@ -681,36 +640,17 @@ if(settings->leveldnti ==1) { X = X<65535.0f ? gamcurve[X] : (Color::gamma((double)X/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); Y = Y<65535.0f ? gamcurve[Y] : (Color::gamma((double)Y/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); Z = Z<65535.0f ? gamcurve[Z] : (Color::gamma((double)Z/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); - if(((i1|j1)&1) == 0) { - if(noiseLCurve) { - // float noiseluma=(float) dnparams.luma; - float kN = lumcalc[i>>1][j>>1]; - float epsi = 0.01f; - if (kN<2.f) - kN = 2.f; - if (kN>32768.f) - kN = 32768.f; - float kinterm = epsi + noiseLCurve[xdivf(kN,15)*500.f]; - float ki = kinterm*100.f; - ki += noiseluma; - noisevarlum[i1>>1][j1>>1] = SQR((ki/125.f)*(1.f+ki/25.f)); - } - if(noiseCCurve) { - float aN = acalc[i>>1][j>>1]; - float bN = bcalc[i>>1][j>>1]; - float cN = sqrtf(SQR(aN)+SQR(bN)); - if(cN < 100.f) - cN = 100.f;//avoid divided by zero - float Cinterm = 1.f + ponderCC*4.f*noiseCCurve[cN/60.f]; - noisevarchrom[i1>>1][j1>>1] = max(noisevarab_b,noisevarab_r)*SQR(Cinterm); - } - } //end chroma labdn->L[i1][j1] = Y; labdn->a[i1][j1] = (X-Y); labdn->b[i1][j1] = (Y-Z); Lin[i1][j1] = Y; + + if(((i1|j1)&1) == 0) { + noisevarlum[(i1>>1)*width2+(j1>>1)] = useNoiseLCurve ? lumcalc[i>>1][j>>1] : noisevarL; + noisevarchrom[(i1>>1)*width2+(j1>>1)] = useNoiseCCurve ? maxNoiseVarab*ccalc[i>>1][j>>1] : 1.f; + } } } @@ -720,12 +660,12 @@ if(settings->leveldnti ==1) { #ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) #endif - for (int i=tiletop/*, i1=0*/; ir(i,j) ;//for luminance denoise curve + float rLum=src->r(i,j) ;//for denoise curves float gLum=src->g(i,j) ; float bLum=src->b(i,j) ; @@ -746,15 +686,21 @@ if(settings->leveldnti ==1) { //convert Lab Color::XYZ2Lab(X, Y, Z, L, a, b); - float Llum,alum,blum; + labdn->L[i1][j1] = L; + labdn->a[i1][j1] = a; + labdn->b[i1][j1] = b; + + Lin[i1][j1] = L; + if(((i1|j1)&1) == 0) { - if(noiseLCurve || noiseCCurve) { + float Llum,alum,blum; + if(useNoiseLCurve || useNoiseCCurve) { float XL,YL,ZL; Color::rgbxyz(rLum,gLum,bLum,XL,YL,ZL,wp); Color::XYZ2Lab(XL, YL, ZL, Llum, alum, blum); } - if(noiseLCurve) { + if(useNoiseLCurve) { float kN = Llum; float epsi=0.01f; if(kN<2.f) kN=2.f; @@ -762,33 +708,26 @@ if(settings->leveldnti ==1) { float kinterm=epsi + noiseLCurve[xdivf(kN,15)*500.f]; float ki=kinterm*100.f; ki+=noiseluma; - noiseluma += 1.f; - noisevarlum[i1>>1][j1>>1]=SQR((ki/125.f)*(1.f+ki/25.f)); - } - if(noiseCCurve) { + noisevarlum[(i1>>1)*width2+(j1>>1)]=SQR((ki/125.f)*(1.f+ki/25.f)); + } else { + noisevarlum[(i1>>1)*width2+(j1>>1)] = noisevarL; + } + if(useNoiseCCurve) { float aN=alum; float bN=blum; float cN=sqrtf(SQR(aN)+SQR(bN)); if(cN < 100.f) cN=100.f;//avoid divided by zero ??? float Cinterm=1.f + ponderCC*4.f*noiseCCurve[cN/60.f]; - noisevarchrom[i1>>1][j1>>1]=max(noisevarab_b,noisevarab_r)*SQR(Cinterm); - } + noisevarchrom[(i1>>1)*width2+(j1>>1)]= maxNoiseVarab*SQR(Cinterm); + } else { + noisevarchrom[(i1>>1)*width2+(j1>>1)] = 1.f; + } } - labdn->L[i1][j1] = L; - labdn->a[i1][j1] = a; - labdn->b[i1][j1] = b; - - Lin[i1][j1] = L; } } } - //initial impulse denoise, removed in Issue 2557 -// if (dnparams.luma>0.01) { -// impulse_nr (labdn, float(MIN(50.0,dnparams.luma))/20.0f); -// } - int datalen = labdn->W * labdn->H; //now perform basic wavelet denoise @@ -808,27 +747,24 @@ if(settings->leveldnti ==1) { execwavelet=true; if(settings->leveldnautsimpl==1 && (dnparams.Cmethod=="AUT" || dnparams.Cmethod=="PRE")) autoch=true; - if(settings->leveldnautsimpl==0 && dnparams.C2method=="AUTO" || dnparams.C2method=="PREV") + if(settings->leveldnautsimpl==0 && (dnparams.C2method=="AUTO" || dnparams.C2method=="PREV")) autoch=true; if(execwavelet) {//gain time if user choose only median sliders L <=1 slider chrom master < 1 - { // enclosing this code in a block frees about 120 MB before allocating 20 MB after this block (measured with D700 NEF) - wavelet_decomposition* Ldecomp; - wavelet_decomposition* adecomp; - wavelet_decomposition* bdecomp; - int schoice=0;//shrink method - if(dnparams.smethod=="shal") schoice=0; - else if(dnparams.smethod=="shalbi") schoice=2; - - int levwav=5; - float maxreal = max(realred, realblue); - //increase the level of wavelet if user increase much or very much sliders - if( maxreal < 8.f) levwav=5; - else if( maxreal < 10.f)levwav=6; - else if( maxreal < 15.f)levwav=7; - else levwav=8;//maximum ==> I have increase Maxlevel in cplx_wavelet_dec.h from 8 to 9 - if(schoice==2) levwav+=settings->nrwavlevel;//increase level for enhanced mode - if(levwav>8) levwav=8; + wavelet_decomposition* Ldecomp; + wavelet_decomposition* adecomp; + wavelet_decomposition* bdecomp; + + int levwav=5; + float maxreal = max(realred, realblue); + //increase the level of wavelet if user increase much or very much sliders + if( maxreal < 8.f) levwav=5; + else if( maxreal < 10.f)levwav=6; + else if( maxreal < 15.f)levwav=7; + else levwav=8;//maximum ==> I have increase Maxlevel in cplx_wavelet_dec.h from 8 to 9 + if(nrQuality == QUALITY_HIGH) + levwav += settings->nrwavlevel;//increase level for enhanced mode + if(levwav>8) levwav=8; // if (settings->verbose) printf("levwavelet=%i noisevarA=%f noisevarB=%f \n",levwav, noisevarab_r, noisevarab_b ); #ifdef _OPENMP @@ -839,33 +775,36 @@ if(settings->leveldnti ==1) { #pragma omp section #endif { - Ldecomp = new wavelet_decomposition (labdn->data, labdn->W, labdn->H, levwav/*maxlevels*/, 1/*subsampling*/ ); + Ldecomp = new wavelet_decomposition (labdn->data, labdn->W, labdn->H, levwav/*maxlevels*/, 1/*subsampling*/ ); } #ifdef _OPENMP #pragma omp section #endif { - adecomp = new wavelet_decomposition (labdn->data+datalen, labdn->W, labdn->H,levwav, 1 ); + adecomp = new wavelet_decomposition (labdn->data+datalen, labdn->W, labdn->H,levwav, 1 ); } #ifdef _OPENMP #pragma omp section #endif { - bdecomp = new wavelet_decomposition (labdn->data+2*datalen, labdn->W, labdn->H, levwav, 1 ); + bdecomp = new wavelet_decomposition (labdn->data+2*datalen, labdn->W, labdn->H, levwav, 1 ); } } - if(schoice==0) WaveletDenoiseAll(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, mad_LL, mad_aa, mad_bb, noisevarab_r, noisevarab_b,labdn, noiseLCurve, noiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, schoice, autoch, perf );//enhance mode - if(schoice==2) { - WaveletDenoiseAll_BiShrink(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, mad_LL, mad_aa, mad_bb, noisevarab_r, noisevarab_b,labdn, noiseLCurve, noiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, schoice, autoch, perf );//enhance mode - WaveletDenoiseAll(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, mad_LL, mad_aa, mad_bb, noisevarab_r, noisevarab_b,labdn, noiseLCurve, noiseCCurve, chaut ,redaut, blueaut, maxredaut, maxblueaut, schoice, autoch, perf ); + if(nrQuality==QUALITY_STANDARD) + WaveletDenoiseAll(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, noisevarab_r, noisevarab_b,labdn, useNoiseLCurve, useNoiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, autoch, denoiseMethodRgb );//enhance mode + else /*if(nrQuality==QUALITY_HIGH)*/ { + WaveletDenoiseAll_BiShrink(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, noisevarab_r, noisevarab_b,labdn, useNoiseLCurve, useNoiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, autoch, denoiseMethodRgb );//enhance mode + WaveletDenoiseAll(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, noisevarab_r, noisevarab_b,labdn, useNoiseLCurve, useNoiseCCurve, chaut ,redaut, blueaut, maxredaut, maxblueaut, autoch, denoiseMethodRgb ); } - float chresid,chmaxredresid,chmaxblueresid,chresidred, chresidblue; - //kall=0 call by Dcrop - resid=0.f;residred=0.f;residblue=0.f; - if(kall==0) Noise_residual(*Ldecomp, *adecomp, *bdecomp, width, height, chresid, chmaxredresid,chmaxblueresid, chresidred, chresidblue, resid, residblue, residred, maxredresid, maxblueresid, nbresid); - //printf("NoiRESID=%3.1f maxR=%3.1f maxB=%3.1f red=%3.1f blue=%3.1f\n",chresid, chmaxredresid,chmaxblueresid, chresidred, chresidblue); - nresi=chresid; - highresi=chresid + 0.66f*(max(chmaxredresid,chmaxblueresid) - chresid);//evaluate sigma + //kall=0 call by Dcrop + if(kall==0) { + float chresid,chmaxredresid,chmaxblueresid,chresidred, chresidblue; + resid=0.f;residred=0.f;residblue=0.f; + Noise_residual(*Ldecomp, *adecomp, *bdecomp, width, height, chresid, chmaxredresid,chmaxblueresid, chresidred, chresidblue, resid, residblue, residred, maxredresid, maxblueresid, nbresid, denoiseMethodRgb); + //printf("NoiRESID=%3.1f maxR=%3.1f maxB=%3.1f red=%3.1f blue=%3.1f\n",chresid, chmaxredresid,chmaxblueresid, chresidred, chresidblue); + nresi=chresid; + highresi=chresid + 0.66f*(max(chmaxredresid,chmaxblueresid) - chresid);//evaluate sigma + } #ifdef _OPENMP #pragma omp parallel sections num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) #endif @@ -874,34 +813,26 @@ if(settings->leveldnti ==1) { #pragma omp section #endif { - Ldecomp->reconstruct(labdn->data); - delete Ldecomp; + Ldecomp->reconstruct(labdn->data); + delete Ldecomp; } #ifdef _OPENMP #pragma omp section #endif { - adecomp->reconstruct(labdn->data+datalen); - delete adecomp; + adecomp->reconstruct(labdn->data+datalen); + delete adecomp; } #ifdef _OPENMP #pragma omp section #endif { - bdecomp->reconstruct(labdn->data+2*datalen); - delete bdecomp; + bdecomp->reconstruct(labdn->data+2*datalen); + delete bdecomp; } } } - } - //TODO: at this point wavelet coefficients storage can be freed - //Issue 1680: Done now - //second impulse denoise, removed in Issue 2557 -// if (dnparams.luma>0.01) { -// impulse_nr (labdn, MIN(50.0f,(float)dnparams.luma)/20.0f); -// } - //PF_correct_RT(dst, dst, defringe.radius, defringe.threshold); int metchoice=0; @@ -911,7 +842,7 @@ if(settings->leveldnti ==1) { else if(dnparams.methodmed=="Lpab") metchoice=4; //median on Luminance Lab only - if( (metchoice==1 || metchoice==2 || metchoice==3 || metchoice==4) && dnparams.median) { + if( (metchoice==1 || metchoice==2 || metchoice==3 || metchoice==4) && dnparams.median) { //printf("Lab et Lonly \n"); float** tmL; int wid=labdn->W; @@ -977,7 +908,7 @@ if(settings->leveldnti ==1) { #pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) #endif for (int i=1; iL[i][j] ,labdn->L[i-1][j], labdn->L[i+1][j] ,labdn->L[i][j+1],labdn->L[i][j-1], tmL[i][j]);//3x3 soft @@ -1249,14 +1180,13 @@ if(settings->leveldnti ==1) { const int numblox_W = ceil(((float)(width))/(offset))+2*blkrad; const int numblox_H = ceil(((float)(height))/(offset))+2*blkrad; - //const int nrtiles = numblox_W*numblox_H; // end of tiling calc { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // Main detail recovery algorithm: Block loop - //DCT block data storage + //DCT block data storage #ifdef _OPENMP @@ -1283,7 +1213,6 @@ if(settings->leveldnti ==1) { int top = (vblk-blkrad)*offset; float * datarow = pBuf +blkrad*offset; - for (int i=0/*, row=top*/; ileveldnti ==1) { //now fill this row of the blocks with Lab high pass data for (int hblk=0; hblk=0 && top+i=0 && left+j=0 && top+ileveldnti ==1) { } //convert back to RGB and write to destination array if (isRAW) { - if(!perf) {//Lab mode - realred /= 100.f; - realblue /= 100.f; + if(!denoiseMethodRgb) {//Lab mode + realred /= 100.f; + realblue /= 100.f; #ifdef _OPENMP #pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) #endif for (int i=tiletop; ixyz - L = labdn->L[i1][j1]; - a = labdn->a[i1][j1]; - b = labdn->b[i1][j1]; + float L = labdn->L[i1][j1]; + float a = labdn->a[i1][j1]; + float b = labdn->b[i1][j1]; float c_h=SQR(a)+SQR(b); if(c_h>9000000.f){ - a *= 1.f + Qhigh*realred; - b *= 1.f + Qhigh*realblue; + a *= 1.f + qhighFactor*realred; + b *= 1.f + qhighFactor*realblue; } //convert XYZ + float X,Y,Z; Color::Lab2XYZ(L, a, b, X, Y, Z); //apply inverse gamma noise float r_,g_,b_; @@ -1420,22 +1360,12 @@ if(settings->leveldnti ==1) { r_ = r_<32768.f ? igamcurve[r_] : (Color::gammanf(r_/32768.f, igam) * 65535.f); g_ = g_<32768.f ? igamcurve[g_] : (Color::gammanf(g_/32768.f, igam) * 65535.f); b_ = b_<32768.f ? igamcurve[b_] : (Color::gammanf(b_/32768.f, igam) * 65535.f); - //readapt arbitrary gamma (inverse from beginning) - if (gamlab == 0) { - r_ = Color::gammatab_26_11[r_]; - g_ = Color::gammatab_26_11[g_]; - b_ = Color::gammatab_26_11[b_]; - } - else if (gamlab == 1) { - r_ = Color::gammatab_4[r_]; - g_ = Color::gammatab_4[g_]; - b_ = Color::gammatab_4[b_]; - } - else if (gamlab == 2) { - r_ = Color::gammatab_55[r_]; - g_ = Color::gammatab_55[g_]; - b_ = Color::gammatab_55[b_]; - } + + //readapt arbitrary gamma (inverse from beginning) + r_ = (*denoisegamtab)[r_]; + g_ = (*denoisegamtab)[g_]; + b_ = (*denoisegamtab)[b_]; + float factor = Vmask[i1]*Hmask[j1]; dsttmp->r(i,j) += factor*r_; @@ -1448,17 +1378,16 @@ if(settings->leveldnti ==1) { else {//RGB mode for (int i=tiletop; ia[i1][j1])+SQR(labdn->b[i1][j1])); if(c_h>3000.f){ - labdn->a[i1][j1]*=1.f + Qhigh*realred/100.f; - labdn->b[i1][j1]*=1.f + Qhigh*realblue/100.f; + labdn->a[i1][j1]*=1.f + qhighFactor*realred/100.f; + labdn->b[i1][j1]*=1.f + qhighFactor*realblue/100.f; } - Y = labdn->L[i1][j1]; - X = (labdn->a[i1][j1]) + Y; - Z = Y - (labdn->b[i1][j1]); + float Y = labdn->L[i1][j1]; + float X = (labdn->a[i1][j1]) + Y; + float Z = Y - (labdn->b[i1][j1]); X = X<32768.0f ? igamcurve[X] : (Color::gamma((float)X/32768.0f, igam, igamthresh, igamslope, 1.0, 0.0) * 65535.0f); @@ -1478,19 +1407,19 @@ if(settings->leveldnti ==1) { } else { for (int i=tiletop; iL[i1][j1]; - a = labdn->a[i1][j1]; - b = labdn->b[i1][j1]; + float L = labdn->L[i1][j1]; + float a = labdn->a[i1][j1]; + float b = labdn->b[i1][j1]; float c_h=sqrt(SQR(a)+SQR(b)); if(c_h>3000.f){ - a*=1.f + Qhigh*realred/100.f; - b*=1.f + Qhigh*realblue/100.f; + a*=1.f + qhighFactor*realred/100.f; + b*=1.f + qhighFactor*realblue/100.f; } + float X,Y,Z; Color::Lab2XYZ(L, a, b, X, Y, Z); float factor = Vmask[i1]*Hmask[j1]; @@ -1512,20 +1441,14 @@ if(settings->leveldnti ==1) { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% delete labdn; - delete [] mad_LL; - delete [] mad_aa; - delete [] mad_bb; }//end of tile row }//end of tile loop - for (int i=0; i<(tileheight+1)/2; i++) - delete [] noisevarlum[i]; delete [] noisevarlum; - for (int i=0; i<(tileheight+1)/2; i++) - delete [] noisevarchrom[i]; delete [] noisevarchrom; } + for(int i=0;ir(i,j),source->r(i-1,j),source->r(i+1,j),source->r(i,j+1),source->r(i,j-1),source->r(i-1,j-1),source->r(i-1,j+1),source->r(i+1,j-1),source->r(i+1,j+1), source->r(i-2,j),source->r(i+2,j),source->r(i,j+2),source->r(i,j-2),source->r(i-2,j-2),source->r(i-2,j+2),source->r(i+2,j-2),source->r(i+2,j+2), source->r(i-2,j+1),source->r(i+2,j+1),source->r(i-1,j+2),source->r(i-1,j-2),source->r(i-2,j-1),source->r(i+2,j-1),source->r(i+1,j+2),source->r(i+1,j-2), tm[i][j]);//5x5 - } - else + } + } else for (int j=2; jr(i,j);pp[1]=source->r(i-1,j); pp[2]=source->r(i+1,j);pp[3]=source->r(i,j+1);pp[4]=source->r(i,j-1);pp[5]=source->r(i-1,j-1);pp[6]=source->r(i-1,j+1); pp[7]=source->r(i+1,j-1);pp[8]=source->r(i+1,j+1);pp[9]=source->r(i+2,j);pp[10]=source->r(i-2,j);pp[11]=source->r(i,j+2);pp[12]=source->r(i,j-2); @@ -1647,15 +1571,16 @@ for(int iteration=1;iteration<=dnparams.passes;iteration++){ } else { #pragma omp for for (int i=2; ib(i,j),source->b(i-1,j),source->b(i+1,j),source->b(i,j+1),source->b(i,j-1),source->b(i-1,j-1),source->b(i-1,j+1),source->b(i+1,j-1),source->b(i+1,j+1), source->b(i-2,j),source->b(i+2,j),source->b(i,j+2),source->b(i,j-2),source->b(i-2,j-2),source->b(i-2,j+2),source->b(i+2,j-2),source->b(i+2,j+2), source->b(i-2,j+1),source->b(i+2,j+1),source->b(i-1,j+2),source->b(i-1,j-2),source->b(i-2,j-1),source->b(i+2,j-1),source->b(i+1,j+2),source->b(i+1,j-2), tm[i][j]);//5x5 - } - else + } + } else for (int j=2; jb(i,j);pp[1]=source->b(i-1,j); pp[2]=source->b(i+1,j);pp[3]=source->b(i,j+1);pp[4]=source->b(i,j-1);pp[5]=source->b(i-1,j-1);pp[6]=source->b(i-1,j+1); pp[7]=source->b(i+1,j-1);pp[8]=source->b(i+1,j+1);pp[9]=source->b(i+2,j);pp[10]=source->b(i-2,j);pp[11]=source->b(i,j+2);pp[12]=source->b(i,j-2); @@ -1691,15 +1616,16 @@ for(int iteration=1;iteration<=dnparams.passes;iteration++){ } else { #pragma omp for for (int i=2; ig(i,j),source->g(i-1,j),source->g(i+1,j),source->g(i,j+1),source->g(i,j-1),source->g(i-1,j-1),source->g(i-1,j+1),source->g(i+1,j-1),source->g(i+1,j+1), source->g(i-2,j),source->g(i+2,j),source->g(i,j+2),source->g(i,j-2),source->g(i-2,j-2),source->g(i-2,j+2),source->g(i+2,j-2),source->g(i+2,j+2), source->g(i-2,j+1),source->g(i+2,j+1),source->g(i-1,j+2),source->g(i-1,j-2),source->g(i-2,j-1),source->g(i+2,j-1),source->g(i+1,j+2),source->g(i+1,j-2), tm[i][j]);//5x5 - } - else + } + } else for (int j=2; jg(i,j);pp[1]=source->g(i-1,j); pp[2]=source->g(i+1,j);pp[3]=source->g(i,j+1);pp[4]=source->g(i,j-1);pp[5]=source->g(i-1,j-1);pp[6]=source->g(i-1,j+1); pp[7]=source->g(i+1,j-1);pp[8]=source->g(i+1,j+1);pp[9]=source->g(i+2,j);pp[10]=source->g(i-2,j);pp[11]=source->g(i,j+2);pp[12]=source->g(i,j-2); @@ -1725,16 +1651,13 @@ for(int iteration=1;iteration<=dnparams.passes;iteration++){ } //end median - if(noiseLCurve || noiseCCurve) { - for (int i=0; i<(hei+1)/2; i++) + if(noiseLCurve || useNoiseCCurve) { + for (int i=0; i<(hei); i++) delete [] lumcalc[i]; delete [] lumcalc; - for (int i=0; i<(hei+1)/2; i++) - delete [] acalc[i]; - delete [] acalc; - for (int i=0; i<(hei+1)/2; i++) - delete [] bcalc[i]; - delete [] bcalc; + for (int i=0; i<(hei); i++) + delete [] ccalc[i]; + delete [] ccalc; } @@ -1764,7 +1687,7 @@ SSEFUNCTION void ImProcFunctions::RGBtile_denoise (float * fLblox, int hblproc, __m128 noisevar_Ldetailv = _mm_set1_ps( noisevar_Ldetail ); __m128 onev = _mm_set1_ps( 1.0f ); for (int n=0; n 5.f ){ - for (int i=0; i0.001f || noisevar_abb>0.001f) { #ifdef __SSE2__ - __m128 onev = _mm_set1_ps(1.f); + __m128 onev = _mm_set1_ps(1.f); + __m128 mad_arv = _mm_set1_ps(mad_ar); + __m128 mad_brv = _mm_set1_ps(mad_br); __m128 rmad_Lm9v = onev / _mm_set1_ps(mad_Lr * 9.f); __m128 mad_av; __m128 mad_bv; __m128 mag_Lv, mag_av, mag_bv; __m128 tempav, tempbv; int coeffloc_ab; - for (coeffloc_ab=0; coeffloc_ab0.00001f) { - if (!noiseLCurve || noiseLCurve.getSum() < 7.f ) { - for (int i=0; i= 7.f) { - for (int i=0; i maxHL) maxHL = WaveletCoeffs_L.level_H(lvl); } - #ifdef _OPENMP #pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) @@ -2223,12 +2121,8 @@ SSEFUNCTION void ImProcFunctions::WaveletDenoiseAll_BiShrink(wavelet_decompositi float ** WavCoeffs_a = WaveletCoeffs_a.level_coeffs(lvl); float ** WavCoeffs_b = WaveletCoeffs_b.level_coeffs(lvl); - int callby=0; - if(schoice==0) callby=0; - if(schoice==2) callby=1; - ShrinkAll(WavCoeffs_L, WavCoeffs_a, WavCoeffs_b, buffer, lvl, Wlvl_L, Hlvl_L, Wlvl_ab, Hlvl_ab, - skip_L, skip_ab, noisevar_L, noisevarlum, noisevarchrom, width, height, mad_LL, mad_aa, mad_bb, noisevar_abr, noisevar_abb, noi, noiseLCurve, noiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, callby, autoch, perf); + skip_L, skip_ab, noisevar_L, noisevarlum, noisevarchrom, width, height, noisevar_abr, noisevar_abb, noi, useNoiseLCurve, useNoiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, autoch, denoiseMethodRgb); } for(int i=4;i>=0;i--) @@ -2239,8 +2133,8 @@ SSEFUNCTION void ImProcFunctions::WaveletDenoiseAll_BiShrink(wavelet_decompositi //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% SSEFUNCTION void ImProcFunctions::ShrinkAll(float ** WavCoeffs_L, float ** WavCoeffs_a, float ** WavCoeffs_b, float **buffer, int level, - int W_L, int H_L, int W_ab, int H_ab,int skip_L, int skip_ab, float noisevar_L, float **noisevarlum, float **noisevarchrom, int width, int height, float * mad_LL, float * mad_aa, float * mad_bb, float noisevar_abr, float noisevar_abb, LabImage * noi, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &chaut, float &redaut, float &blueaut, - float &maxredaut, float &maxblueaut, int callby, bool autoch, bool perf, float * madaa, float * madab, float * madaL, bool madCalculated ) + int W_L, int H_L, int W_ab, int H_ab,int skip_L, int skip_ab, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, + float &maxredaut, float &maxblueaut, bool autoch, bool denoiseMethodRgb, float * madaa, float * madab, float * madaL, bool madCalculated ) { //simple wavelet shrinkage @@ -2265,7 +2159,7 @@ SSEFUNCTION void ImProcFunctions::ShrinkAll(float ** WavCoeffs_L, float ** WavCo madb = madab[dir-1]; madL = madaL[dir-1] ; } else { - if(!perf) { + if(!denoiseMethodRgb) { madL = SQR(Mad(WavCoeffs_L[dir], W_L*H_L)); mada = SQR(Mad(WavCoeffs_a[dir], W_ab*H_ab)); madb = SQR(Mad(WavCoeffs_b[dir], W_ab*H_ab)); @@ -2275,35 +2169,24 @@ SSEFUNCTION void ImProcFunctions::ShrinkAll(float ** WavCoeffs_L, float ** WavCo madb = SQR(MadRgb(WavCoeffs_b[dir], W_ab*H_ab)); } } - - if (!noiseCCurve || noiseCCurve.getSum() < 5.f ){ - for (int i=0; i 5.f ){ - for (int i=0; i0.001f || noisevar_abb>0.001f ) { + mada = useNoiseCCurve ? mada : mada * noisevar_abr; + madb = useNoiseCCurve ? madb : madb * noisevar_abb; #ifdef __SSE2__ - __m128 onev = _mm_set1_ps(1.f); - __m128 rmadLm9v = onev / _mm_set1_ps(madL * 9.f); + __m128 onev = _mm_set1_ps(1.f); + __m128 mad_arv = _mm_set1_ps(mada); + __m128 mad_brv = _mm_set1_ps(madb); + + __m128 rmadLm9v = onev / _mm_set1_ps(madL * 9.f); __m128 mad_av ; __m128 mad_bv; __m128 mag_Lv, mag_av, mag_bv; int coeffloc_ab; for (coeffloc_ab=0; coeffloc_ab0.00001f) { - if (!noiseLCurve || noiseLCurve.getSum() < 7.f ){//under 7 quasi no action - for (int i=0; i= 7.f) { - float precalc = madL * 5.f / (float)(level+1); - for (int i=0; inrhigh; chaut = (chaut*Nb-maxmax)/(Nb-1);//suppress maximum for chaut calcul - if (redyel > 5000.f || skinc > 1000.f && nsknc < 0.4f && chromina > 3000.f) + if ((redyel > 5000.f || skinc > 1000.f) && nsknc < 0.4f && chromina > 3000.f) chaut *= 0.45f;//reduct action in red zone, except skin for high / med chroma - else if (redyel>12000.f || skinc > 1200.f && nsknc < 0.3f && chromina > 3000.f) + else if ((redyel>12000.f || skinc > 1200.f) && nsknc < 0.3f && chromina > 3000.f) chaut *= 0.3f; if (mode==0 || mode==2) {//Preview or Auto multizone @@ -2664,7 +2532,7 @@ void ImProcFunctions::calcautodn_info (float &chaut, float &delta, int Nb, int l } SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &nresi, float &highresi, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread) -{ +{ if ((settings->leveldnautsimpl==1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl==0 && dnparams.C2method == "MANU")) { //nothing to do return; @@ -2678,11 +2546,12 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat wid=provicalc->width; TMatrix wprofi = iccStore->workingSpaceMatrix (params->icm.working); - double wpi[3][3] = { - {wprofi[0][0],wprofi[0][1],wprofi[0][2]}, - {wprofi[1][0],wprofi[1][1],wprofi[1][2]}, - {wprofi[2][0],wprofi[2][1],wprofi[2][2]} - }; + const float wpi[3][3] = { + {static_cast(wprofi[0][0]),static_cast(wprofi[0][1]),static_cast(wprofi[0][2])}, + {static_cast(wprofi[1][0]),static_cast(wprofi[1][1]),static_cast(wprofi[1][2])}, + {static_cast(wprofi[2][0]),static_cast(wprofi[2][1]),static_cast(wprofi[2][2])} + }; + lumcalc = new float*[hei]; for (int i=0; iworkingSpaceMatrix (params->icm.working); - - double wp[3][3] = { - {wprof[0][0],wprof[0][1],wprof[0][2]}, - {wprof[1][0],wprof[1][1],wprof[1][2]}, - {wprof[2][0],wprof[2][1],wprof[2][2]} + const float wp[3][3] = { + {static_cast(wprof[0][0]),static_cast(wprof[0][1]),static_cast(wprof[0][2])}, + {static_cast(wprof[1][0]),static_cast(wprof[1][1]),static_cast(wprof[1][2])}, + {static_cast(wprof[2][0]),static_cast(wprof[2][1]),static_cast(wprof[2][2])} }; float chau=0.f; @@ -2761,6 +2627,18 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat float minchblue=100000000.f; int nb=0; int comptlevel=0; + + // To avoid branches in loops we access the gammatabs by pointers + LUTf *denoiseigamtab; + switch(settings->denoiselabgamma) { + case 0: denoiseigamtab = &(Color::igammatab_26_11); + break; + case 1: denoiseigamtab = &(Color::igammatab_4); + break; + default: denoiseigamtab = &(Color::igammatab_55); + break; + } + for (int tiletop=0; tiletop>1][j>>1]); + bNv = LVFU(bcalc[i>>1][j>>1]); + _mm_storeu_ps(&noisevarhue[i1>>1][j1>>1], xatan2f(bNv,aNv)); + _mm_storeu_ps(&noisevarchrom[i1>>1][j1>>1], _mm_max_ps(c100v,_mm_sqrt_ps(SQRV(aNv)+SQRV(bNv)))); } - for (; j>1][j>>1]; + float bN = bcalc[i>>1][j>>1]; float cN = sqrtf(SQR(aN)+SQR(bN)); - noisevarhue[i1][j1] = xatan2f(bN,aN); + noisevarhue[i1>>1][j1>>1] = xatan2f(bN,aN); if(cN < 100.f) cN=100.f;//avoid divided by zero - noisevarchrom[i1][j1] = cN; + noisevarchrom[i1>>1][j1>>1] = cN; } #else - for (int j=tileleft; j>1][j>>1]; + float bN = bcalc[i>>1][j>>1]; float cN = sqrtf(SQR(aN)+SQR(bN)); float hN = xatan2f(bN,aN); if(cN < 100.f) cN = 100.f;//avoid divided by zero - noisevarchrom[i1][j1] = cN; - noisevarhue[i1][j1] = hN; + noisevarchrom[i1>>1][j1>>1] = cN; + noisevarhue[i1>>1][j1>>1] = hN; } #endif } #ifdef _OPENMP #pragma omp parallel for if(multiThread) #endif - for (int i=tiletop; i>1][j>>1]; Llum = Llum < 2.f ? 2.f : Llum; //avoid divided by zero ? Llum = Llum > 32768.f ? 32768.f : Llum; // not strictly necessary - noisevarlum[i1][j1]= Llum; + noisevarlum[i1>>1][j1>>1]= Llum; } } if (!perf){//lab mode, modification Jacques feb 2013 and july 2014 @@ -2875,24 +2749,10 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat float G_ = gain*src->g(i,j); float B_ = gain*src->b(i,j); - //finally I opted fot gamma55 and with options we can change - if (gamlab == 0) {// options 12/2013 - R_ = Color::igammatab_26_11[R_]; - G_ = Color::igammatab_26_11[G_]; - B_ = Color::igammatab_26_11[B_]; - } - else if (gamlab == 1) { - //other new gamma 4 5 - R_ = Color::igammatab_4[R_]; - G_ = Color::igammatab_4[G_]; - B_ = Color::igammatab_4[B_]; - } - else if (gamlab == 2) { - //new gamma 5.5 10 better for detail luminance..it is a compromise...which depends on the image (distribution BL, ML, HL ...) - R_ = Color::igammatab_55[R_]; - G_ = Color::igammatab_55[G_]; - B_ = Color::igammatab_55[B_]; - } + R_ = (*denoiseigamtab)[R_]; + G_ = (*denoiseigamtab)[G_]; + B_ = (*denoiseigamtab)[B_]; + //apply gamma noise standard (slider) R_ = R_<65535.0f ? gamcurve[R_] : (Color::gamman((double)R_/65535.0, gam)*32768.0f); G_ = G_<65535.0f ? gamcurve[G_] : (Color::gamman((double)G_/65535.0, gam)*32768.0f); @@ -2958,25 +2818,24 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat //convert Lab Color::XYZ2Lab(X, Y, Z, L, a, b); - float Llum,alum,blum; - float XL,YL,ZL; - Color::rgbxyz(rLum,gLum,bLum,XL,YL,ZL,wp); - Color::XYZ2Lab(XL, YL, ZL, Llum, alum, blum); - - float kN=Llum; - float epsi=0.01f; - if(kN<2.f) kN=2.f; - if(kN>32768.f) kN=32768.f; - noisevarlum[i1][j1]=kN; - float aN=alum; - float bN=blum; - float hN=xatan2f(bN,aN); - float cN=sqrt(SQR(aN)+SQR(bN)); - if(cN < 100.f) cN=100.f;//avoid divided by zero - noisevarchrom[i1][j1]=cN; - noisevarhue[i1][j1]=hN; - + if(((i1|j1)&1) == 0) { + float Llum,alum,blum; + float XL,YL,ZL; + Color::rgbxyz(rLum,gLum,bLum,XL,YL,ZL,wp); + Color::XYZ2Lab(XL, YL, ZL, Llum, alum, blum); + float kN=Llum; + if(kN<2.f) kN=2.f; + if(kN>32768.f) kN=32768.f; + noisevarlum[i1>>1][j1>>1]=kN; + float aN=alum; + float bN=blum; + float hN=xatan2f(bN,aN); + float cN=sqrt(SQR(aN)+SQR(bN)); + if(cN < 100.f) cN=100.f;//avoid divided by zero + noisevarchrom[i1>>1][j1>>1]=cN; + noisevarhue[i1>>1][j1>>1]=hN; + } labdn->a[i1][j1] = a; labdn->b[i1][j1] = b; } @@ -3018,27 +2877,24 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat } } bool autoch = dnparams.autochroma; - if(comptlevel==0) WaveletDenoiseAll_info(levwav, *adecomp, *bdecomp, noisevarlum, noisevarchrom, noisevarhue, width, height, mad_aa, mad_bb, noisevarab_r, noisevarab_b, labdn, chaut, Nb, redaut, blueaut, maxredaut, maxblueaut, minredaut, minblueaut, schoice, autoch, chromina, sigma, lumema, sigma_L, redyel, skinc, nsknc,maxchred, maxchblue, minchred, minchblue, nb,chau ,chred, chblue, perf, multiThread);//enhance mode + if(comptlevel==0) WaveletDenoiseAll_info(levwav, *adecomp, *bdecomp, noisevarlum, noisevarchrom, noisevarhue, width, height, noisevarab_r, noisevarab_b, labdn, chaut, Nb, redaut, blueaut, maxredaut, maxblueaut, minredaut, minblueaut, schoice, autoch, chromina, sigma, lumema, sigma_L, redyel, skinc, nsknc,maxchred, maxchblue, minchred, minchblue, nb,chau ,chred, chblue, perf, multiThread);//enhance mode comptlevel+=1; - float chresid,chmaxredresid,chmaxblueresid,chresidred, chresidblue; + float chresid,chmaxredresid,chmaxblueresid; nresi=chresid; highresi=chresid + 0.66f*(max(chmaxredresid,chmaxblueresid) - chresid);//evaluate sigma delete adecomp; delete bdecomp; delete labdn; - for (int i=0; i SSEFUNCTION void boxabsblur (T* src, A* dst, int radx __m128 tempv,lenv,lenp1v,lenm1v,rlenv; for (int col = 0; col < W-3; col+=4) { lenv = leninitv; - tempv = LVFU(temp[0*W+col]); + tempv = LVF(temp[0*W+col]); for (int i=1; i<=rady; i++) { - tempv = tempv + LVFU(temp[i*W+col]); + tempv = tempv + LVF(temp[i*W+col]); } tempv = tempv / lenv; - _mm_storeu_ps( &dst[0*W+col], tempv ); + STVF(dst[0*W+col], tempv); for (int row=1; row<=rady; row++) { lenp1v = lenv + onev; - tempv = (tempv*lenv + LVFU(temp[(row+rady)*W+col]))/lenp1v; - _mm_storeu_ps( &dst[row*W+col],tempv); + tempv = (tempv*lenv + LVF(temp[(row+rady)*W+col]))/lenp1v; + STVF(dst[row*W+col],tempv); lenv = lenp1v; } rlenv = onev / lenv; for (int row = rady+1; row < H-rady; row++) { - tempv = tempv + (LVFU(temp[(row+rady)*W+col])- LVFU(temp[(row-rady-1)*W+col]))*rlenv; - _mm_storeu_ps( &dst[row*W+col], tempv); + tempv = tempv + (LVF(temp[(row+rady)*W+col])- LVF(temp[(row-rady-1)*W+col]))*rlenv; + STVF(dst[row*W+col], tempv); } for (int row=H-rady; row - wavelet_decomposition(E * src, int width, int height, int maxlvl, int subsampling); + wavelet_decomposition(E * src, int width, int height, int maxlvl, int subsampling, int skipcrop = 1); ~wavelet_decomposition(); @@ -72,11 +72,6 @@ namespace rtengine { return wavelet_decomp[level]->height(); } - int level_pad(int level) const - { - return wavelet_decomp[level]->padding(); - } - int level_stride(int level) const { return wavelet_decomp[level]->stride(); @@ -96,7 +91,7 @@ namespace rtengine { }; template - wavelet_decomposition::wavelet_decomposition(E * src, int width, int height, int maxlvl, int subsampling) + wavelet_decomposition::wavelet_decomposition(E * src, int width, int height, int maxlvl, int subsampling, int skipcrop) : lvltot(0), subsamp(subsampling), m_w(width), m_h(height), coeff0(NULL) { //initialize wavelet filters @@ -116,21 +111,20 @@ namespace rtengine { // after coefficient rotation, data structure is: // wavelet_decomp[scale][channel={lo,hi1,hi2,hi3}][pixel_array] - int padding = 0;//pow(2, maxlvl);//must be a multiple of two lvltot=0; E *buffer[2]; buffer[0] = new E[(m_w/2+1)*(m_h/2+1)]; buffer[1] = new E[(m_w/2+1)*(m_h/2+1)]; int bufferindex = 0; - wavelet_decomp[lvltot] = new wavelet_level(src, buffer[bufferindex^1], lvltot/*level*/, subsamp, padding/*padding*/, m_w, m_h, \ - wavfilt_anal, wavfilt_anal, wavfilt_len, wavfilt_offset); + wavelet_decomp[lvltot] = new wavelet_level(src, buffer[bufferindex^1], lvltot/*level*/, subsamp, m_w, m_h, \ + wavfilt_anal, wavfilt_anal, wavfilt_len, wavfilt_offset, skipcrop); while(lvltot < maxlvl-1) { lvltot++; bufferindex ^= 1; - wavelet_decomp[lvltot] = new wavelet_level(buffer[bufferindex], buffer[bufferindex^1]/*lopass*/, lvltot/*level*/, subsamp, 0/*no padding*/, \ + wavelet_decomp[lvltot] = new wavelet_level(buffer[bufferindex], buffer[bufferindex^1]/*lopass*/, lvltot/*level*/, subsamp, \ wavelet_decomp[lvltot-1]->width(), wavelet_decomp[lvltot-1]->height(), \ - wavfilt_anal, wavfilt_anal, wavfilt_len, wavfilt_offset); + wavfilt_anal, wavfilt_anal, wavfilt_len, wavfilt_offset, skipcrop); } coeff0 = buffer[bufferindex^1]; delete[] buffer[bufferindex]; diff --git a/rtengine/cplx_wavelet_level.h b/rtengine/cplx_wavelet_level.h index 874476c9a..dabd8c8ff 100644 --- a/rtengine/cplx_wavelet_level.h +++ b/rtengine/cplx_wavelet_level.h @@ -31,9 +31,6 @@ namespace rtengine { class wavelet_level { - // size of padded border - size_t m_pad; - // level of decomposition int lvl; @@ -49,19 +46,27 @@ namespace rtengine { // load a row/column of input data, possibly with padding - void AnalysisFilterHaarVertical (T * srcbuffer, T * dstLo, T * dstHi, int pitch, int srclen, int row); - void AnalysisFilterHaarHorizontal (T * srcbuffer, T * dstLo, T * dstHi, int srclen, int row); - void SynthesisFilterHaarHorizontal (T * srcLo, T * srcHi, T * dst, int dstlen); - void SynthesisFilterHaarVertical (T * srcLo, T * srcHi, T * dst, int pitch, int dstlen); + void AnalysisFilterHaarVertical (const T * const srcbuffer, T * dstLo, T * dstHi, const int width, const int height, const int row); + void AnalysisFilterHaarHorizontal (const T * const srcbuffer, T * dstLo, T * dstHi, const int width, const int row); + void SynthesisFilterHaarHorizontal (const T * const srcLo, const T * const srcHi, T * dst, const int width, const int height); + void SynthesisFilterHaarVertical (const T * const srcLo, const T * const srcHi, T * dst, const int width, const int height); void AnalysisFilterSubsampHorizontal (T * srcbuffer, T * dstLo, T * dstHi, float *filterLo, float *filterHi, - int taps, int offset, int pitch, int srclen, int m_w2, int row); + const int taps, const int offset, const int srcwidth, const int dstwidth, const int row); +#ifdef __SSE2__ + void AnalysisFilterSubsampVertical (T * srcbuffer, T * dstLo, T * dstHi, float (*filterLo)[4], float (*filterHi)[4], + const int taps, const int offset, const int width, const int height, const int row); +#else void AnalysisFilterSubsampVertical (T * srcbuffer, T * dstLo, T * dstHi, float *filterLo, float *filterHi, - int taps, int offset, int pitch, int srclen, int row); + int const taps, const int offset, const int width, const int height, const int row); +#endif void SynthesisFilterSubsampHorizontal (T * srcLo, T * srcHi, T * dst, - float *filterLo, float *filterHi, int taps, int offset, int dstlen); - void SynthesisFilterSubsampVertical (T * srcLo, T * srcHi, T * dst, float *filterLo, float *filterHi, int taps, int offset, int pitch, int dstlen); - + float *filterLo, float *filterHi, const int taps, const int offset, const int scrwidth, const int dstwidth, const int height); +#ifdef __SSE2__ + void SynthesisFilterSubsampVertical (T * srcLo, T * srcHi, T * dst, float (*filterLo)[4], float (*filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight); +#else + void SynthesisFilterSubsampVertical (T * srcLo, T * srcHi, T * dst, float *filterLo, float *filterHi, const int taps, const int offset, const int width, const int srcheight, const int dstheight); +#endif public: T ** wavcoeffs; @@ -72,18 +77,20 @@ namespace rtengine { size_t m_w2, m_h2; template - wavelet_level(E * src, E * dst, int level, int subsamp, int padding, size_t w, size_t h, float *filterV, float *filterH, int len, int offset) - : m_w(w), m_h(h), m_w2(w), m_h2(h), m_pad(padding), wavcoeffs(NULL), lvl(level), skip(1<>level)&1) + wavelet_level(E * src, E * dst, int level, int subsamp, size_t w, size_t h, float *filterV, float *filterH, int len, int offset, int skipcrop) + : lvl(level), subsamp_out((subsamp>>level)&1), skip(1<>n)&1); - } + } + skip /= skipcrop; + if(skip < 1) skip=1; + } - m_w2 = (subsamp_out ? ((w+1+2*skip*padding)/2) : (w+2*skip*padding)); - m_h2 = (subsamp_out ? ((h+1+2*skip*padding)/2) : (h+2*skip*padding)); - m_pad= skip*padding; + m_w2 = (subsamp_out ? (w+1)/2 : w); + m_h2 = (subsamp_out ? (h+1)/2 : h); wavcoeffs = create((m_w2)*(m_h2)); decompose_level(src, dst, filterV, filterH, len, offset); @@ -115,11 +122,6 @@ namespace rtengine { return m_h2; } - size_t padding() const - { - return m_pad/skip; - } - size_t stride() const { return skip; @@ -151,33 +153,33 @@ namespace rtengine { } template - void wavelet_level::AnalysisFilterHaarHorizontal (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, int srclen, int row) { + void wavelet_level::AnalysisFilterHaarHorizontal (const T * const RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, const int width, const int row) { /* Basic convolution code * Applies a Haar filter */ - for(int i = 0; i < (srclen - skip); i++) { - dstLo[row*srclen+i] = (srcbuffer[i] + srcbuffer[i+skip]); - dstHi[row*srclen+i] = (srcbuffer[i] - srcbuffer[i+skip]); + for(int i = 0; i < (width - skip); i++) { + dstLo[row*width+i] = (srcbuffer[i] + srcbuffer[i+skip]); + dstHi[row*width+i] = (srcbuffer[i] - srcbuffer[i+skip]); } - for(size_t i = max(srclen-skip,skip); i < (srclen); i++) { - dstLo[row*srclen+i] = (srcbuffer[i] + srcbuffer[i-skip]); - dstHi[row*srclen+i] = (srcbuffer[i] - srcbuffer[i-skip]); + for(size_t i = max(width-skip,skip); i < (width); i++) { + dstLo[row*width+i] = (srcbuffer[i] + srcbuffer[i-skip]); + dstHi[row*width+i] = (srcbuffer[i] - srcbuffer[i-skip]); } } - template void wavelet_level::AnalysisFilterHaarVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, int pitch, int srclen, int row) { + template void wavelet_level::AnalysisFilterHaarVertical (const T * const RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, const int width, const int height, const int row) { /* Basic convolution code * Applies a Haar filter */ - if(row < (srclen - skip)) { - for(int j=0;j=max(srclen-skip,skip)) { - for(int j=0;j=max(height-skip,skip)) { + for(int j=0;j void wavelet_level::SynthesisFilterHaarHorizontal (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, int dstlen) { + template void wavelet_level::SynthesisFilterHaarHorizontal (const T * const RESTRICT srcLo, const T * const RESTRICT srcHi, T * RESTRICT dst, const int width, const int height) { /* Basic convolution code * Applies a Haar filter * */ - for (int k=0; k void wavelet_level::SynthesisFilterHaarVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, int pitch, int dstlen) { + template void wavelet_level::SynthesisFilterHaarVertical (const T * const RESTRICT srcLo, const T * const RESTRICT srcHi, T * RESTRICT dst, const int width, const int height) { /* Basic convolution code * Applies a Haar filter * */ - for(size_t i = (m_pad); i < (m_pad+skip); i++) { - for(int j=0;j - void wavelet_level::AnalysisFilterSubsampHorizontal (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float * RESTRICT filterLo, float *filterHi, - int taps, int offset, int pitch, int srclen, int m_w2, int row) { + void wavelet_level::AnalysisFilterSubsampHorizontal (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float * RESTRICT filterLo, float *RESTRICT filterHi, + const int taps, const int offset, const int srcwidth, const int dstwidth, const int row) { /* Basic convolution code * Applies an FIR filter 'filter' with filter length 'taps', * aligning the 'offset' element of the filter with @@ -229,62 +231,125 @@ namespace rtengine { * Output is subsampled by two */ // calculate coefficients - for(int i = 0; i < srclen; i+=2) { - float lo = 0.f, hi = 0.f; - if (LIKELY(i>skip*taps && i void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float * RESTRICT filterLo, float * RESTRICT filterHi, - int taps, int offset, int pitch, int srclen, int row) { - - /* Basic convolution code - * Applies an FIR filter 'filter' with filter length 'taps', - * aligning the 'offset' element of the filter with - * the input pixel, and skipping 'skip' pixels between taps - * Output is subsampled by two - */ - - // calculate coefficients - if (LIKELY(row>skip*taps && rowskip*taps && i void wavelet_level::SynthesisFilterSubsampHorizontal (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float * RESTRICT filterLo, float * RESTRICT filterHi, int taps, int offset, int dstlen) { +#ifdef __SSE2__ + template void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], + const int taps, const int offset, const int width, const int height, const int row) { + + /* Basic convolution code + * Applies an FIR filter 'filter' with filter length 'taps', + * aligning the 'offset' element of the filter with + * the input pixel, and skipping 'skip' pixels between taps + * Output is subsampled by two + */ + + // calculate coefficients + if (LIKELY(row>skip*taps && row void wavelet_level::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float * RESTRICT filterLo, float * RESTRICT filterHi, + const int taps, const int offset, const int width, const int height, const int row) { + + /* Basic convolution code + * Applies an FIR filter 'filter' with filter length 'taps', + * aligning the 'offset' element of the filter with + * the input pixel, and skipping 'skip' pixels between taps + * Output is subsampled by two + */ + + // calculate coefficients + if (LIKELY(row>skip*taps && row void wavelet_level::SynthesisFilterSubsampHorizontal (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float * RESTRICT filterLo, float * RESTRICT filterHi, const int taps, const int offset, const int srcwidth, const int dstwidth, const int height) { /* Basic convolution code * Applies an FIR filter 'filter' with filter length 'taps', @@ -294,78 +359,157 @@ namespace rtengine { */ // calculate coefficients - int srclen = (dstlen==m_w ? m_w2 : m_h2);//length of row/col in src (coarser level) int shift = skip*(taps-offset-1);//align filter with data - for (int k=0; kskip*taps && i<(srclen-skip*taps))) {//bulk - for (int j=begin, l=0; j SSEFUNCTION void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight) + { - template void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float * RESTRICT filterLo, float * RESTRICT filterHi, int taps, int offset, int pitch, int dstlen) { - - /* Basic convolution code - * Applies an FIR filter 'filter' with filter length 'taps', - * aligning the 'offset' element of the filter with - * the input pixel, and skipping 'skip' pixels between taps - * Output is subsampled by two - */ + /* Basic convolution code + * Applies an FIR filter 'filter' with filter length 'taps', + * aligning the 'offset' element of the filter with + * the input pixel, and skipping 'skip' pixels between taps + * Output is subsampled by two + */ // calculate coefficients - int srclen = (dstlen==m_w ? m_w2 : m_h2);//length of row/col in src (coarser level) int shift=skip*(taps-offset-1);//align filter with data - - for(size_t i = m_pad; i < (dstlen+m_pad); i++) { + __m128 fourv = _mm_set1_ps(4.f); + for(size_t i = 0; i < dstheight; i++) { int i_src = (i+shift)/2; int begin = (i+shift)%2; //TODO: this is correct only if skip=1; otherwise, want to work with cosets of length 'skip' - if (LIKELY(i>skip*taps && i<(srclen-skip*taps))) {//bulk - for (int k=0; kskip*taps && i<(dstheight-skip*taps))) {//bulk + int k; + for (k=0; k void wavelet_level::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float * RESTRICT filterLo, float * RESTRICT filterHi, const int taps, const int offset, const int width, const int srcheight, const int dstheight) + { - template template void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { + /* Basic convolution code + * Applies an FIR filter 'filter' with filter length 'taps', + * aligning the 'offset' element of the filter with + * the input pixel, and skipping 'skip' pixels between taps + * Output is subsampled by two + */ + + // calculate coefficients + int shift=skip*(taps-offset-1);//align filter with data + + for(size_t i = 0; i < dstheight; i++) { + int i_src = (i+shift)/2; + int begin = (i+shift)%2; + //TODO: this is correct only if skip=1; otherwise, want to work with cosets of length 'skip' + if (LIKELY(i>skip*taps && i<(dstheight-skip*taps))) {//bulk + for (int k=0; k template SSEFUNCTION void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { T tmpLo[m_w] ALIGNED64; T tmpHi[m_w] ALIGNED64; /* filter along rows and columns */ if(subsamp_out) { + float filterVarray[2*taps][4] ALIGNED64; + for(int i=0;i<2*taps;i++) { + for(int j=0;j<4;j++) { + filterVarray[i][j] = filterV[i]; + } + } for(int row=0;row template void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { + T tmpLo[m_w] ALIGNED64; + T tmpHi[m_w] ALIGNED64; + /* filter along rows and columns */ + if(subsamp_out) { + for(int row=0;row template SSEFUNCTION void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset) { + + /* filter along rows and columns */ + if (subsamp_out) { + float filterVarray[2*taps][4] ALIGNED64; + for(int i=0;i<2*taps;i++) { + for(int j=0;j<4;j++) { + filterVarray[i][j] = filterV[i]; + } + } + SynthesisFilterSubsampHorizontal (src, wavcoeffs[1], tmpLo, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); + SynthesisFilterSubsampHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); + SynthesisFilterSubsampVertical (tmpLo, tmpHi, dst, filterVarray, filterVarray+taps, taps, offset, m_w, m_h2, m_h); + } else { + SynthesisFilterHaarHorizontal (src, wavcoeffs[1], tmpLo, m_w, m_h2); + SynthesisFilterHaarHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, m_w, m_h2); + SynthesisFilterHaarVertical (tmpLo, tmpHi, dst, m_w, m_h); + } + } +#else template template void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset) { /* filter along rows and columns */ if (subsamp_out) { - SynthesisFilterSubsampHorizontal (src, wavcoeffs[1], tmpLo, filterH, filterH+taps, taps, offset, m_w/*dstlen*/); - SynthesisFilterSubsampHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, filterH, filterH+taps, taps, offset, m_w/*dstlen*/); - SynthesisFilterSubsampVertical (tmpLo, tmpHi, dst, filterV, filterV+taps, taps, offset, m_w/*pitch*/, m_h/*dstlen*/); + SynthesisFilterSubsampHorizontal (src, wavcoeffs[1], tmpLo, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); + SynthesisFilterSubsampHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); + SynthesisFilterSubsampVertical (tmpLo, tmpHi, dst, filterV, filterV+taps, taps, offset, m_w, m_h2, m_h); } else { - SynthesisFilterHaarHorizontal (src, wavcoeffs[1], tmpLo, m_w); - SynthesisFilterHaarHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, m_w); + SynthesisFilterHaarHorizontal (src, wavcoeffs[1], tmpLo, m_w, m_h2); + SynthesisFilterHaarHorizontal (wavcoeffs[2], wavcoeffs[3], tmpHi, m_w, m_h2); SynthesisFilterHaarVertical (tmpLo, tmpHi, dst, m_w, m_h); } } - +#endif }; #endif diff --git a/rtengine/dcp.cc b/rtengine/dcp.cc index b067b2120..685ce6f38 100644 --- a/rtengine/dcp.cc +++ b/rtengine/dcp.cc @@ -287,7 +287,7 @@ void DCPProfile::MakeXYZCAM(ColorTemp &wb, double pre_mul[3], double camWbMatrix } // mix if we have two matrices - double mix; + double mix = 1.0; if ((hasCol1 && hasCol2) || (hasFwd1 && hasFwd2)) { double wbtemp; /* DNG ref way to convert XY to temperature, which affect matrix mixing. A different model here diff --git a/rtengine/dcraw.h b/rtengine/dcraw.h index 3bfda4c1f..804cfc49c 100644 --- a/rtengine/dcraw.h +++ b/rtengine/dcraw.h @@ -48,19 +48,19 @@ public: ,ifname(NULL) ,meta_data(NULL) ,shot_select(0),multi_out(0) + ,float_raw_image(NULL) ,image(NULL) ,bright(1.),threshold(0.) ,half_size(0),four_color_rgb(0),document_mode(0),highlight(0) ,verbose(0) + ,use_auto_wb(0),use_camera_wb(0),use_camera_matrix(1) + ,output_color(1),output_bps(8),output_tiff(0),med_passes(0),no_auto_bright(0) ,RT_whitelevel_from_constant(0) ,RT_blacklevel_from_constant(0) ,RT_matrix_from_constant(0) - ,use_auto_wb(0),use_camera_wb(0),use_camera_matrix(1) - ,output_color(1),output_bps(8),output_tiff(0),med_passes(0),no_auto_bright(0) ,getbithuff(this,ifp,zero_after_ff) ,ph1_bithuff(this,ifp,order) ,pana_bits(ifp,load_flags) - ,float_raw_image(NULL) { memset(&hbd, 0, sizeof(hbd)); aber[0]=aber[1]=aber[2]=aber[3]=1; @@ -106,9 +106,9 @@ protected: int output_color, output_bps, output_tiff, med_passes; int no_auto_bright; unsigned greybox[4] ; - int RT_matrix_from_constant; - int RT_blacklevel_from_constant; int RT_whitelevel_from_constant; + int RT_blacklevel_from_constant; + int RT_matrix_from_constant; float cam_mul[4], pre_mul[4], cmatrix[3][4], rgb_cam[3][4]; diff --git a/rtengine/dcrop.cc b/rtengine/dcrop.cc index 5391a67f3..121b347c9 100644 --- a/rtengine/dcrop.cc +++ b/rtengine/dcrop.cc @@ -23,7 +23,6 @@ #include "refreshmap.h" #include "rt_math.h" #include "colortemp.h" - // "ceil" rounding #define SKIPS(a,b) ((a) / (b) + ((a) % (b) > 0)) @@ -158,14 +157,11 @@ void Crop::update (int todo) { // printf("x=%d y=%d crow=%d croh=%d skip=%d\n",rqcropx, rqcropy, rqcropw, rqcroph, skip); // printf("trafx=%d trafyy=%d trafwsk=%d trafHs=%d \n",trafx, trafy, trafw*skip, trafh*skip); - int skipP=1;//force Skip for noise evaluation - Imagefloat *calclum = NULL;//for Luminance denoise curve NoiseCurve noiseLCurve; NoiseCurve noiseCCurve; float autoNR = (float) settings->nrauto;// float autoNRmax = (float) settings->nrautomax;// - float autohigh = (float) settings->nrhigh;// params.dirpyrDenoise.getCurves(noiseLCurve, noiseCCurve); @@ -186,7 +182,6 @@ void Crop::update (int todo) { parent->ipf.Tile_calc (tilesize, overlap, kall, widIm, heiIm, numtiles_W, numtiles_H, tilewidth, tileheight, tileWskip, tileHskip); kall=0; - int nbtl=numtiles_W*numtiles_H; float *ch_M = new float [9];//allocate memory float *max_r = new float [9]; float *max_b = new float [9]; @@ -212,71 +207,7 @@ void Crop::update (int todo) { parent->imgsrc->getImage (parent->currWB, tr, origCrop, pp, params.toneCurve, params.icm, params.raw ); } } - /* - if(params.dirpyrDenoise.Cmethod=="PON") { - MyTime t1dce,t2dce; - t1dce.set(); - int crW=100;//crop noise width - int crH=100;//crop noise height - Imagefloat *origCropPart;//init auto noise - origCropPart = new Imagefloat (crW, crH);//allocate memory - - int skipP=1; - if (skip==1 && params.dirpyrDenoise.enabled) {//evaluate Noise - - for(int wcr=0;wcrimgsrc->getImage (parent->currWB, tr, origCropPart, ppP, params.toneCurve, params.icm, params.raw ); - - Imagefloat *provi; - Imagefloat *provicalc; - provi = new Imagefloat (crW, crH); - if(origCropPart != provi) - origCropPart->copyData(provi); - provicalc = new Imagefloat (crW, crH); - if(origCropPart != provicalc) - origCropPart->copyData(provicalc); - parent->imgsrc->convertColorSpace(provicalc, params.icm, parent->currWB, params.raw);//for denoise luminance curve - float maxr=0.f; - float maxb=0.f; - - float chaut, redaut, blueaut, maxredaut, maxblueaut, minredaut, minblueaut, nresi, highresi, chromina, sigma, lumema, sigma_L; - chaut=0.f;redaut=0.f; blueaut=0.f; maxredaut=0.f; maxblueaut=0.f;chromina=0.f; sigma=0.f; - parent->ipf.RGB_denoise_info(provi, provi, provicalc, parent->imgsrc->isRAW(), params.dirpyrDenoise, params.defringe, parent->imgsrc->getDirPyrDenoiseExpComp(), noiseLCurve,lldenoiseutili, noiseCCurve,ccdenoiseutili, chaut, redaut, blueaut, maxredaut, maxblueaut, minredaut, minblueaut, nresi, highresi, chromina, sigma, lumema, sigma_L); - //printf("DCROP skip=%d cha=%f red=%f bl=%f redM=%f bluM=%f chrom=%f sigm=%f lum=%f\n",skip, chaut,redaut,blueaut, maxredaut, maxblueaut, chromina, sigma, lumema); - if(maxredaut > maxblueaut) { - maxr=(maxredaut-chaut)/(autoNRmax/2.f); - if(minblueaut <= minredaut && minblueaut < chaut) maxb=(-chaut+minblueaut)/autoNRmax; - } - else { - maxb=(maxblueaut-chaut)/(autoNRmax/2.f); - if(minredaut <= minblueaut && minredaut < chaut) maxr=(-chaut+minredaut)/autoNRmax; - }//maxb mxr - empirical evaluation red / blue - - ch_M[hcr*numtiles_W + wcr]=chaut/autoNR; - max_r[hcr*numtiles_W + wcr]=maxr; - max_b[hcr*numtiles_W + wcr]=maxb; - - delete provi; - - } - } - - } - delete origCropPart; - if (settings->verbose) { - t2dce.set(); - printf("Info denoise ponderated performed in %d usec:\n", t2dce.etime(t1dce)); - } - - } - */ - if((settings->leveldnautsimpl==1 && params.dirpyrDenoise.Cmethod=="PRE") || (settings->leveldnautsimpl==0 && params.dirpyrDenoise.C2method=="PREV")) { PreviewProps pp (trafx, trafy, trafw*skip, trafh*skip, skip); parent->imgsrc->getImage (parent->currWB, tr, origCrop, pp, params.toneCurve, params.icm, params.raw ); @@ -298,12 +229,12 @@ void Crop::update (int todo) { if(abs(centerTile_Y[cc]-CenterPreview_Y) < minimuY) {minimuY=abs(centerTile_Y[cc]-CenterPreview_Y);poscenterY=cc;} } // printf("TileCX=%d TileCY=%d prevX=%d prevY=%d \n",centerTile_X[poscenterX],centerTile_Y[poscenterY],CenterPreview_X,CenterPreview_Y); - int crW,crH; - if(settings->leveldnv ==0) {crW=100;crH=100;} - if(settings->leveldnv ==1) {crW=250;crH=250;} + int crW; + if(settings->leveldnv ==0) {crW=100;} + if(settings->leveldnv ==1) {crW=250;} // if(settings->leveldnv ==2) {crW=int(tileWskip/2);crH=int((tileWskip/2));}//adapted to scale of preview - if(settings->leveldnv ==2) {crW=int(tileWskip/2);crH=int(tileHskip/2);} - if(settings->leveldnv ==3) {crW=tileWskip-10;crH=tileHskip-10;} + if(settings->leveldnv ==2) {crW=int(tileWskip/2);} + if(settings->leveldnv ==3) {crW=tileWskip-10;} float adjustr=1.f; if (params.icm.working=="ProPhoto") {adjustr =1.f;} @@ -323,9 +254,17 @@ void Crop::update (int todo) { // cropImageListener->setPosition (centerTile_X[poscenterX],centerTile_Y[poscenterY] , true); //setCropSizes (centerTile_X[poscenterX], centerTile_Y[poscenterY], trafw*skip,trafh*skip , skip, true); - Imagefloat *provicalc = new Imagefloat (cropw, croph);//for Luminance denoise curve - origCrop->copyData(provicalc); + // we only need image reduced to 1/4 here + Imagefloat *provicalc = new Imagefloat ((cropw+1)/2, (croph+1)/2);//for denoise curves + for(int ii=0;iir(ii>>1,jj>>1) = origCrop->r(ii,jj); + provicalc->g(ii>>1,jj>>1) = origCrop->g(ii,jj); + provicalc->b(ii>>1,jj>>1) = origCrop->b(ii,jj); + } + } parent->imgsrc->convertColorSpace(provicalc, params.icm, parent->currWB, params.raw);//for denoise luminance curve + float maxr=0.f; float maxb=0.f; float chaut, redaut, blueaut, maxredaut, maxblueaut, minredaut, minblueaut, nresi, highresi, chromina, sigma, lumema, sigma_L, redyel, skinc, nsknc; @@ -380,7 +319,6 @@ void Crop::update (int todo) { // if(settings->leveldnv ==2) {crW=int(tileWskip/2);crH=int((tileWskip/2));}//adapted to scale of preview if(settings->leveldnv ==2) {crW=int(tileWskip/2);crH=int(tileHskip/2);} if(settings->leveldnv ==3) {crW=tileWskip-10;crH=tileHskip-10;} - float lowdenoise=1.f; int levaut=settings->leveldnaut; if(levaut==1) //Standard @@ -394,10 +332,8 @@ void Crop::update (int todo) { #pragma omp parallel #endif { - Imagefloat *origCropPart;//init auto noise - origCropPart = new Imagefloat (crW, crH);//allocate memory - Imagefloat *provicalc; - provicalc = new Imagefloat (crW, crH); + Imagefloat *origCropPart = new Imagefloat (crW, crH);//allocate memory + Imagefloat *provicalc = new Imagefloat ((crW+1)/2, (crH+1)/2);//for denoise curves int coordW[3];//coordonate of part of image to mesure noise int coordH[3]; @@ -410,18 +346,22 @@ void Crop::update (int todo) { #endif for(int wcr=0;wcr<=2;wcr++) { for(int hcr=0;hcr<=2;hcr++) { - PreviewProps ppP (coordW[wcr] , coordH[hcr], crW, crH, skipP); + PreviewProps ppP (coordW[wcr] , coordH[hcr], crW, crH, 1); parent->imgsrc->getImage (parent->currWB, tr, origCropPart, ppP, params.toneCurve, params.icm, params.raw ); - if(origCropPart != provicalc) - origCropPart->copyData(provicalc); + // we only need image reduced to 1/4 here + for(int ii=0;iir(ii>>1,jj>>1) = origCropPart->r(ii,jj); + provicalc->g(ii>>1,jj>>1) = origCropPart->g(ii,jj); + provicalc->b(ii>>1,jj>>1) = origCropPart->b(ii,jj); + } + } parent->imgsrc->convertColorSpace(provicalc, params.icm, parent->currWB, params.raw);//for denoise luminance curve - //float maxr=0.f; - //float maxb=0.f; + float pondcorrec=1.0f; float chaut=0.f, redaut=0.f, blueaut=0.f, maxredaut=0.f, maxblueaut=0.f, minredaut=0.f, minblueaut=0.f, nresi=0.f, highresi=0.f, chromina=0.f, sigma=0.f, lumema=0.f, sigma_L=0.f, redyel=0.f, skinc=0.f, nsknc=0.f; int nb=0; -// chaut=0.f;redaut=0.f; blueaut=0.f; maxredaut=0.f; maxblueaut=0.f; chromina=0.f; sigma=0.f; parent->ipf.RGB_denoise_info(origCropPart, provicalc, parent->imgsrc->isRAW(), gamcurve, gam, gamthresh, gamslope, params.dirpyrDenoise, parent->imgsrc->getDirPyrDenoiseExpComp(), chaut, nb, redaut, blueaut, maxredaut, maxblueaut, minredaut, minblueaut, nresi, highresi, chromina, sigma, lumema, sigma_L, redyel, skinc, nsknc); //printf("DCROP skip=%d cha=%f red=%f bl=%f redM=%f bluM=%f chrom=%f sigm=%f lum=%f\n",skip, chaut,redaut,blueaut, maxredaut, maxblueaut, chromina, sigma, lumema); @@ -469,27 +409,22 @@ void Crop::update (int todo) { else if (params.icm.working=="BestRGB") {adjustr =1.f/1.2f;} else if (params.icm.working=="BruceRGB") {adjustr =1.f/1.2f;} - float maxmax; - float minmin; float delta[9]; int mode=1; - float redyel, skinc, nsknc; int lissage=settings->leveldnliss; for (int k=0;k<9;k++) { - maxmax=max(max_r[k],max_b[k]); + float maxmax=max(max_r[k],max_b[k]); parent->ipf.calcautodn_info (ch_M[k], delta[k], Nb[k], levaut, maxmax,lumL[k],chromC[k], mode, lissage, ry[k], sk[k], pcsk[k]); // printf("ch_M=%f delta=%f\n",ch_M[k], delta[k]); } for (int k=0;k<9;k++) { if(max_r[k] > max_b[k]) { - minmin=min(min_r[k],min_b[k]); Max_R[k]=(delta[k])/((autoNRmax*multip*adjustr*lowdenoise)/2.f); Min_B[k]= -(ch_M[k]- min_b[k])/(autoNRmax*multip*adjustr*lowdenoise); Max_B[k]=0.f; Min_R[k]=0.f; } else { - minmin=min(min_r[k],min_b[k]); Max_B[k]=(delta[k])/((autoNRmax*multip*adjustr*lowdenoise)/2.f); Min_R[k]=- (ch_M[k]-min_r[k]) / (autoNRmax*multip*adjustr*lowdenoise); Min_B[k]=0.f; @@ -555,14 +490,20 @@ void Crop::update (int todo) { } else if(denoiseParams.Lmethod == "SLI") noiseLCurve.Reset(); - - if((noiseLCurve || noiseCCurve ) && skip==1 && denoiseParams.enabled) {//only allocate memory if enabled and skip - calclum = new Imagefloat (cropw, croph);//for Luminance denoise curve - origCrop->copyData(calclum); + // we only need image reduced to 1/4 here + calclum = new Imagefloat ((cropw+1)/2, (croph+1)/2);//for denoise curves + for(int ii=0;iir(ii>>1,jj>>1) = origCrop->r(ii,jj); + calclum->g(ii>>1,jj>>1) = origCrop->g(ii,jj); + calclum->b(ii>>1,jj>>1) = origCrop->b(ii,jj); + } + } parent->imgsrc->convertColorSpace(calclum, params.icm, parent->currWB, params.raw);//for denoise luminance curve } + if(skip!=1) if(parent->adnListener) parent->adnListener->noiseChanged(0.f, 0.f); if (todo & M_LINDENOISE) { @@ -643,7 +584,6 @@ void Crop::update (int todo) { baseCrop->b[(int)(xref/skip)][(int)(yref/skip)]/256, parent->imgsrc->getGamma()); }*/ - double rrm, ggm, bbm; float satLimit = float(params.colorToning.satProtectionThreshold)/100.f*0.7f+0.3f; float satLimitOpacity = 1.f-(float(params.colorToning.saturatedOpacity)/100.f); @@ -659,12 +599,13 @@ void Crop::update (int todo) { satLimitOpacity= 100.f*(moyS-0.85f*eqty);//-0.85 sigma==>20% pixels with low saturation } - if (todo & M_RGBCURVE) + if (todo & M_RGBCURVE) { + double rrm, ggm, bbm; parent->ipf.rgbProc (baseCrop, laboCrop, this, parent->hltonecurve, parent->shtonecurve, parent->tonecurve, cshmap, params.toneCurve.saturation, parent->rCurve, parent->gCurve, parent->bCurve, satLimit ,satLimitOpacity, parent->ctColorCurve, parent->ctOpacityCurve, parent->opautili, parent->clToningcurve,parent->cl2Toningcurve, parent->customToneCurve1, parent->customToneCurve2, parent->beforeToneCurveBW, parent->afterToneCurveBW,rrm, ggm, bbm, parent->bwAutoR, parent->bwAutoG, parent->bwAutoB); - + } /*xref=000;yref=000; if (colortest && cropw>115 && croph>115) for(int j=1;j<5;j++){ @@ -744,7 +685,6 @@ void Crop::update (int todo) { if(settings->ciecamfloat) { float d; // not used after this block - skip2=skip; parent->ipf.ciecam_02float (cieCrop, float(adap), begh, endh, 1, 2, labnCrop, ¶ms, parent->customColCurve1, parent->customColCurve2, parent->customColCurve3, dummy, dummy, parent->CAMBrightCurveJ, parent->CAMBrightCurveQ, parent->CAMMean, 5, 1, execsharp, d, skip, 1); } diff --git a/rtengine/dcrop.h b/rtengine/dcrop.h index 23c9ebf8f..6f858779f 100644 --- a/rtengine/dcrop.h +++ b/rtengine/dcrop.h @@ -74,7 +74,6 @@ class Crop : public DetailedCrop, public EditBuffer { public: Crop (ImProcCoordinator* parent, EditDataProvider *editDataProvider, bool isDetailWindow); virtual ~Crop (); - int skip2; void mLock () { cropMutex.lock(); } void mUnlock () { cropMutex.lock(); } diff --git a/rtengine/helpersse2.h b/rtengine/helpersse2.h index 0f0a3b876..cbfcc6691 100644 --- a/rtengine/helpersse2.h +++ b/rtengine/helpersse2.h @@ -29,13 +29,16 @@ typedef __m128i vint2; #if __GNUC__ == 4 && __GNUC_MINOR__ >= 8 #define LVF(x) _mm_load_ps(&x) #define LVFU(x) _mm_loadu_ps(&x) + #define STVF(x,y) _mm_store_ps(&x,y) #else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3 #define LVF(x) _mm_loadu_ps(&x) #define LVFU(x) _mm_loadu_ps(&x) + #define STVF(x,y) _mm_storeu_ps(&x,y) #endif #else -#define LVF(x) _mm_load_ps(&x) -#define LVFU(x) _mm_loadu_ps(&x) + #define LVF(x) _mm_load_ps(&x) + #define LVFU(x) _mm_loadu_ps(&x) + #define STVF(x,y) _mm_store_ps(&x,y) #endif #define LC2VFU(a) _mm_shuffle_ps( LVFU(a), _mm_loadu_ps( (&a) + 4 ), _MM_SHUFFLE( 2,0,2,0 ) ) #define ZEROV _mm_setzero_ps() diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc index 82a2dcef0..c805d240e 100644 --- a/rtengine/improccoordinator.cc +++ b/rtengine/improccoordinator.cc @@ -33,13 +33,13 @@ ImProcCoordinator::ImProcCoordinator () : orig_prev(NULL), oprevi(NULL), oprevl(NULL), nprevl(NULL), previmg(NULL), workimg(NULL), ncie(NULL), imgsrc(NULL), shmap(NULL), lastAwbEqual(0.), ipf(¶ms, true), scale(10), highDetailPreprocessComputed(false), highDetailRawComputed(false), allocated(false), - bwAutoR(-9000.f), bwAutoG(-9000.f), bwAutoB(-9000.f), CAMMean(0.), chaut(0.f), redaut(0.f), blueaut(0.f), maxredaut(0.f), maxblueaut(0.f),nresi(0.f), - chromina(0.f), sigma(0.f), lumema(0.f),minredaut(0.f), minblueaut(0.f), + bwAutoR(-9000.f), bwAutoG(-9000.f), bwAutoB(-9000.f), CAMMean(0.), - hltonecurve(65536,LUT_CLIP_BELOW), // used LUT_CLIP_BELOW, because we want to have a baseline of 2^expcomp in this curve. If we don't clip the lut we get wrong values, see Issue 2621 #14 for details - shtonecurve(65536,2),//clip above + hltonecurve(65536), + shtonecurve(65536), tonecurve(65536,0),//,1); - + chaut(0.f), redaut(0.f), blueaut(0.f), maxredaut(0.f), maxblueaut(0.f),minredaut(0.f), minblueaut(0.f),nresi(0.f), + chromina(0.f), sigma(0.f), lumema(0.f), lumacurve(65536,0), chroma_acurve(65536,0), chroma_bcurve(65536,0), @@ -82,11 +82,11 @@ ImProcCoordinator::ImProcCoordinator () rcurvehist(256), rcurvehistCropped(256), rbeforehist(256), gcurvehist(256), gcurvehistCropped(256), gbeforehist(256), bcurvehist(256), bcurvehistCropped(256), bbeforehist(256), - + fullw(1),fullh(1), pW(-1), pH(-1), - plistener(NULL), imageListener(NULL), aeListener(NULL), hListener(NULL),acListener(NULL), abwListener(NULL),actListener(NULL),adnListener(NULL), - resultValid(false), changeSinceLast(0), updaterRunning(false), destroying(false),utili(false),autili(false),opautili(false), - butili(false),ccutili(false),cclutili(false),clcutili(false),fullw(1),fullh(1) + plistener(NULL), imageListener(NULL), aeListener(NULL), acListener(NULL),abwListener(NULL),actListener(NULL),adnListener(NULL), hListener(NULL), + resultValid(false), changeSinceLast(0), updaterRunning(false), destroying(false),utili(false),autili(false), + butili(false),ccutili(false),cclutili(false),clcutili(false),opautili(false) {} @@ -291,10 +291,15 @@ void ImProcCoordinator::updatePreviewImage (int todo, Crop* cropCall) { if((noiseLCurve || noiseCCurve) && denoiseParams.enabled && (scale==1)){//only allocate memory if enabled and scale=1 - calclum = new Imagefloat (pW, pH);//for Luminance denoise curve - if(orig_prev != calclum) - orig_prev->copyData(calclum); - + // we only need image reduced to 1/4 here + calclum = new Imagefloat ((pW+1)/2, (pH+1)/2);//for luminance denoise curve + for(int ii=0;iir(ii>>1,jj>>1) = orig_prev->r(ii,jj); + calclum->g(ii>>1,jj>>1) = orig_prev->g(ii,jj); + calclum->b(ii>>1,jj>>1) = orig_prev->b(ii,jj); + } + } imgsrc->convertColorSpace(calclum, params.icm, currWB, params.raw);//claculate values after colorspace conversion } //always enabled to calculated auto Chroma diff --git a/rtengine/improccoordinator.h b/rtengine/improccoordinator.h index 958df0277..9a67323dc 100644 --- a/rtengine/improccoordinator.h +++ b/rtengine/improccoordinator.h @@ -114,13 +114,6 @@ class ImProcCoordinator : public StagedImageProcessor { LUTf rCurve; LUTf gCurve; LUTf bCurve; - bool utili; - bool autili; - bool butili; - bool ccutili; - bool cclutili; - bool clcutili; - bool opautili; ToneCurve customToneCurve1; ToneCurve customToneCurve2; ColorGradientCurve ctColorCurve; @@ -176,6 +169,13 @@ class ImProcCoordinator : public StagedImageProcessor { bool updaterRunning; ProcParams nextParams; bool destroying; + bool utili; + bool autili; + bool butili; + bool ccutili; + bool cclutili; + bool clcutili; + bool opautili; void startProcessing (); void process (); diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 5fda98920..c51f860d6 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -35,7 +35,6 @@ #include "iccmatrices.h" #include "color.h" #include "calc_distort.h" -#include "boxblur.h" #include "rt_math.h" #include "EdgePreservingDecomposition.h" #include "improccoordinator.h" diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 454838215..745c41dae 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -174,7 +174,6 @@ class ImProcFunctions { bool iGamma; // true if inverse gamma has to be applied in rgbProc double g; static LUTf cachef; - float Qhigh; double lumimul[3]; // float chau; // float chred; @@ -284,13 +283,13 @@ class ImProcFunctions { // void WaveletDenoiseAll(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, // wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float noisevar_ab, wavelet_decomposition &wch, NoiImage * noi ); void WaveletDenoiseAll(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float **noisevarlum, float **noisevarchrom, int width, int height, float *mad_LL, float *mad_aa, float *mad_bb, float noisevar_abr, float noisevar_abb, LabImage * noi, const NoiseCurve & dnNoisCurve, const NoiseCurve & dnNoisCCcurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, int schoice, bool autoch, bool perf); + wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, bool autoch, bool perf); void WaveletDenoiseAll_info(int levwav, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, int width, int height, float *mad_aa, float *mad_bb, float noisevar_abr, float noisevar_abb, LabImage * noi, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut,float &minredaut, float & minblueaut, int schoice, bool autoch, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel,float &skinc, float &nsknc, + wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut,float &minredaut, float & minblueaut, int schoice, bool autoch, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel,float &skinc, float &nsknc, float &maxchred, float &maxchblue,float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool perf, bool multiThread); void WaveletDenoiseAll_BiShrink(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float **noisevarlum, float **noisevarchrom, int width, int height, float *mad_LL, float *mad_aa, float *mad_bb, float noisevar_abr, float noisevar_abb, LabImage * noi, const NoiseCurve & dnNoisCurve, const NoiseCurve & dnNoisCCcurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, int schoice, bool autoch, bool perf); + wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, bool autoch, bool perf); //void BiShrink(float * ReCoeffs, float * ImCoeffs, float * ReParents, float * ImParents, // int W, int H, int level, int padding, float noisevar); //void Shrink(float ** WavCoeffs, int W, int H, int level, float noisevar); @@ -298,11 +297,11 @@ class ImProcFunctions { // int W_L, int H_L, int W_ab, int H_ab, int W_h, int H_h, int skip_L, int skip_ab, int skip_h, float noisevar_L, float noisevar_ab, float **WavCoeffs_h, LabImage * noi); void ShrinkAll(float ** WavCoeffs_L, float ** WavCoeffs_a, float ** WavCoeffs_b, float **buffer, int level, - int W_L, int H_L, int W_ab, int H_ab, int skip_L, int skip_ab, float noisevar_L, float **noisevarlum, float **noisevarchrom, int width, int height, float *mad_LL, float *mad_aa, float *mad_bb, float noisevar_abr, float noisevar_abb,LabImage * noi, const NoiseCurve & dnNoisCurve, const NoiseCurve & dnNoisCCcurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, int callby, bool autoch, bool perf, float * madaa = NULL, float * madab = NULL, float * madaL = NULL, bool madCalculated= false); + int W_L, int H_L, int W_ab, int H_ab, int skip_L, int skip_ab, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb,LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, bool autoch, bool perf, float * madaa = NULL, float * madab = NULL, float * madaL = NULL, bool madCalculated= false); void ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b, int level, - int W_ab, int H_ab, int skip_ab, float **noisevarlum, float **noisevarchrom, float **noisevarhue, int width, int height, float *mad_aa, float *mad_bb, float noisevar_abr, float noisevar_abb,LabImage * noi, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, bool autoch, int schoice, int lvl, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel,float &skinc, float &nsknc, + int W_ab, int H_ab, int skip_ab, float **noisevarlum, float **noisevarchrom, float **noisevarhue, int width, int height, float noisevar_abr, float noisevar_abb,LabImage * noi, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, bool autoch, int schoice, int lvl, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel,float &skinc, float &nsknc, float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool perf, bool multiThread); - void Noise_residual(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, wavelet_decomposition &WaveletCoeffs_b, int width, int height, float &chresid, float &chmaxredresid,float &chmaxblueresid, float &chresidred, float & chresidblue, float resid, float residblue, float residred, float maxredresid, float maxblueresid, float nbresid); + void Noise_residual(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, wavelet_decomposition &WaveletCoeffs_b, int width, int height, float &chresid, float &chmaxredresid,float &chmaxblueresid, float &chresidred, float & chresidblue, float resid, float residblue, float residred, float maxredresid, float maxblueresid, float nbresid, bool denoiseMethodRgb); void calcautodn_info (float &chaut, float &delta, int Nb, int levaut, float maxmax, float lumema, float chromina, int mode, int lissage, float redyel, float skinc, float nsknc); float MadMax(float * HH_Coeffs, int &max, int datalen); float Mad(float * DataList, const int datalen); diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index 53276bd3e..75f4ea147 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -31,7 +31,6 @@ #include "../rtgui/ppversion.h" #include "../rtgui/multilangmgr.h" #include "mytime.h" -#include "StopWatch.h" #undef THREAD_PRIORITY_NORMAL #ifdef _OPENMP #include @@ -132,7 +131,6 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p params.dirpyrDenoise.getCurves(noiseLCurve, noiseCCurve); float autoNR = (float) settings->nrauto;// float autoNRmax = (float) settings->nrautomax;// - float autohigh = (float) settings->nrhigh;// int tilesize; int overlap; if(settings->leveldnti ==0) { @@ -162,9 +160,6 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p float *pcsk = new float [nbtl]; float *Max_R_ =new float [nbtl]; float *Max_B_ = new float [nbtl]; - float *Min_R_ =new float [nbtl]; - float *Min_B_ = new float [nbtl]; - float *delta_ = new float [nbtl]; // printf("expert=%d\n",settings->leveldnautsimpl); if(settings->leveldnautsimpl==1 && params.dirpyrDenoise.Cmethod=="PON") { MyTime t1pone,t2pone; @@ -185,7 +180,6 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p // int crH=tileHskip-10;//crop noise height // Imagefloat *origCropPart;//init auto noise // origCropPart = new Imagefloat (crW, crH);//allocate memory - StopWatch Stop1("denoise info tiled"); if (params.dirpyrDenoise.enabled) {//evaluate Noise LUTf gamcurve(65536,0); float gam, gamthresh, gamslope; @@ -194,18 +188,24 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p { Imagefloat *origCropPart;//init auto noise origCropPart = new Imagefloat (crW, crH);//allocate memory - Imagefloat *provicalc; - provicalc = new Imagefloat (crW, crH); + Imagefloat *provicalc = new Imagefloat ((crW+1)/2, (crH+1)/2);//for denoise curves int skipP=1; - #pragma omp for schedule(dynamic) collapse(2) nowait + #pragma omp for schedule(dynamic) collapse(2) nowait for(int wcr=0;wcrgetImage (currWB, tr, origCropPart, ppP, params.toneCurve, params.icm, params.raw ); - if(origCropPart != provicalc) - origCropPart->copyData(provicalc); + + // we only need image reduced to 1/4 here + for(int ii=0;iir(ii>>1,jj>>1) = origCropPart->r(ii,jj); + provicalc->g(ii>>1,jj>>1) = origCropPart->g(ii,jj); + provicalc->b(ii>>1,jj>>1) = origCropPart->b(ii,jj); + } + } imgsrc->convertColorSpace(provicalc, params.icm, currWB, params.raw);//for denoise luminance curve float maxr=0.f; float maxb=0.f; @@ -279,8 +279,6 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p if(liss==3){//same as auto but with much cells float MaxR=0.f; float MaxB=0.f; - float MinR=100000000000.f; - float MinB=100000000000.f; float MaxRMoy=0.f; float MaxBMoy=0.f; @@ -316,7 +314,6 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p } } - Stop1.stop(); } @@ -350,15 +347,21 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p { Imagefloat *origCropPart;//init auto noise origCropPart = new Imagefloat (crW, crH);//allocate memory - Imagefloat *provicalc; - provicalc = new Imagefloat (crW, crH); + Imagefloat *provicalc = new Imagefloat ((crW+1)/2, (crH+1)/2);//for denoise curves #pragma omp for schedule(dynamic) collapse(2) nowait for(int wcr=0;wcr<=2;wcr++) { for(int hcr=0;hcr<=2;hcr++) { PreviewProps ppP (coordW[wcr] , coordH[hcr], crW, crH, 1); imgsrc->getImage (currWB, tr, origCropPart, ppP, params.toneCurve, params.icm, params.raw); - origCropPart->copyData(provicalc); + // we only need image reduced to 1/4 here + for(int ii=0;iir(ii>>1,jj>>1) = origCropPart->r(ii,jj); + provicalc->g(ii>>1,jj>>1) = origCropPart->g(ii,jj); + provicalc->b(ii>>1,jj>>1) = origCropPart->b(ii,jj); + } + } imgsrc->convertColorSpace(provicalc, params.icm, currWB, params.raw);//for denoise luminance curve int nb = 0; float chaut=0.f, redaut=0.f, blueaut=0.f, maxredaut=0.f, maxblueaut=0.f, minredaut=0.f, minblueaut=0.f, nresi=0.f, highresi=0.f, chromina=0.f, sigma=0.f, lumema=0.f, sigma_L=0.f, redyel=0.f, skinc=0.f, nsknc=0.f; @@ -407,12 +410,11 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p if(!imgsrc->isRAW()) multip=2.f;//take into account gamma for TIF / JPG approximate value...not good fot gamma=1 - float maxmax; float delta[9]; int mode=1; int lissage=settings->leveldnliss; for(int k=0;k<9;k++) { - maxmax = max(max_r[k],max_b[k]); + float maxmax = max(max_r[k],max_b[k]); ipf.calcautodn_info (ch_M[k], delta[k], Nb[k], levaut, maxmax, lumL[k], chromC[k], mode, lissage, ry[k], sk[k], pcsk[k] ); // printf("ch_M=%f delta=%f\n",ch_M[k], delta[k]); } @@ -527,19 +529,24 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p denoiseParams.luma = 0.0f; } else if(denoiseParams.Lmethod=="SLI") noiseLCurve.Reset(); - if (denoiseParams.enabled && (noiseLCurve || noiseCCurve )) { - - calclum = new Imagefloat (fw, fh);//for luminance denoise curve - if(baseImg != calclum) - baseImg->copyData(calclum); - imgsrc->convertColorSpace(calclum, params.icm, currWB, params.raw); + // we only need image reduced to 1/4 here + calclum = new Imagefloat ((fw+1)/2, (fh+1)/2);//for luminance denoise curve +#pragma omp parallel for + for(int ii=0;iir(ii>>1,jj>>1) = baseImg->r(ii,jj); + calclum->g(ii>>1,jj>>1) = baseImg->g(ii,jj); + calclum->b(ii>>1,jj>>1) = baseImg->b(ii,jj); + } + } + imgsrc->convertColorSpace(calclum, params.icm, currWB, params.raw); } if (denoiseParams.enabled) { // CurveFactory::denoiseLL(lldenoiseutili, denoiseParams.lcurve, Noisecurve,1); //denoiseParams.getCurves(noiseLCurve); // ipf.RGB_denoise(baseImg, baseImg, calclum, imgsrc->isRAW(), denoiseParams, params.defringe, imgsrc->getDirPyrDenoiseExpComp(), noiseLCurve, lldenoiseutili); - float chaut, redaut, blueaut, maxredaut, maxblueaut, nresi, highresi, chromina, sigma, lumema, sigma_L, redyel, skinc, nsknc; + float chaut, redaut, blueaut, maxredaut, maxblueaut, nresi, highresi; int kall=2; ipf.RGB_denoise(kall, baseImg, baseImg, calclum, ch_M, max_r, max_b, imgsrc->isRAW(), denoiseParams, imgsrc->getDirPyrDenoiseExpComp(), noiseLCurve, noiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, nresi, highresi); @@ -557,9 +564,6 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p delete [] pcsk; delete [] Max_R_; delete [] Max_B_; - delete [] Min_R_; - delete [] Min_B_; - delete [] delta_; imgsrc->convertColorSpace(baseImg, params.icm, currWB, params.raw); @@ -589,8 +593,8 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p } // RGB processing - LUTf curve1 (65536,LUT_CLIP_BELOW); // used LUT_CLIP_BELOW, because we want to have a baseline of 2^expcomp in this curve. If we don't clip the lut we get wrong values, see Issue 2621 #14 for details - LUTf curve2 (65536,0); + LUTf curve1 (65536); + LUTf curve2 (65536); LUTf curve (65536,0); LUTf satcurve (65536,0); LUTf lhskcurve (65536,0); @@ -792,8 +796,8 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p customColCurve2, customColCurve3, 1); - double adap; if(params.colorappearance.enabled){ + double adap; float fnum = imgsrc->getMetaData()->getFNumber ();// F number float fiso = imgsrc->getMetaData()->getISOSpeed () ;// ISO float fspeed = imgsrc->getMetaData()->getShutterSpeed () ;//speed @@ -819,7 +823,6 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p else ipf.ciecam_02 (cieView, adap, begh, endh,1,2, labView, ¶ms,customColCurve1,customColCurve2,customColCurve3, dummy, dummy, CAMBrightCurveJ, CAMBrightCurveQ, CAMMean, 5, 1, true, dd, 1, 1); } else { - int f_h=2,f_w=2; float d; double dd; @@ -852,7 +855,7 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p Image16* readyImg = NULL; cmsHPROFILE jprof = NULL; bool customGamma = false; - bool useLCMS; + bool useLCMS = false; if(params.icm.gamma != "default" || params.icm.freegamma) { // if select gamma output between BT709, sRGB, linear, low, high, 2.2 , 1.8 cmsMLU *DescriptionMLU, *CopyrightMLU, *DmndMLU, *DmddMLU;// for modification TAG @@ -860,15 +863,6 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p cmsToneCurve* GammaTRC[3] = { NULL, NULL, NULL }; cmsFloat64Number Parameters[7]; double ga0,ga1,ga2,ga3,ga4,ga5,ga6; - // wchar_t string[80] ; - int ns;//numero of stri[] - if (params.icm.working=="ProPhoto") ns=0; - else if (params.icm.working=="Adobe RGB") ns=1; - else if (params.icm.working=="sRGB") ns=2; - else if (params.icm.working=="WideGamut") ns=3; - else if (params.icm.working=="Beta RGB") ns=4; - else if (params.icm.working=="BestRGB") ns=5; - else if (params.icm.working=="BruceRGB") ns=6; // if(params.blackwhite.enabled) params.toneCurve.hrenabled=false; readyImg = ipf.lab2rgb16b (labView, cx, cy, cw, ch, params.icm.output, params.icm.working, params.icm.gamma, params.icm.freegamma, params.icm.gampos, params.icm.slpos, ga0,ga1,ga2,ga3,ga4,ga5,ga6, params.blackwhite.enabled ); customGamma = true; diff --git a/rtengine/sleef.c b/rtengine/sleef.c index fa3eb22ea..c7b3fb486 100755 --- a/rtengine/sleef.c +++ b/rtengine/sleef.c @@ -1212,26 +1212,40 @@ __inline float xexpf(float d) { } - __inline float xmul2f(float d) { - if (*(int*)&d & 0x7FFFFFFF) { // if f==0 do nothing - *(int*)&d += 1 << 23; // add 1 to the exponent - } - return d; + union { + float floatval; + int intval; + } uflint; + uflint.floatval = d; + if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing + uflint.intval += 1 << 23; // add 1 to the exponent + } + return uflint.floatval; } __inline float xdiv2f(float d) { - if (*(int*)&d & 0x7FFFFFFF) { // if f==0 do nothing - *(int*)&d -= 1 << 23; // sub 1 from the exponent + union { + float floatval; + int intval; + } uflint; + uflint.floatval = d; + if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing + uflint.intval -= 1 << 23; // sub 1 from the exponent } - return d; + return uflint.floatval; } __inline float xdivf( float d, int n){ - if (*(int*)&d & 0x7FFFFFFF) { // if f==0 do nothing - *(int*)&d -= n << 23; // add n to the exponent + union { + float floatval; + int intval; + } uflint; + uflint.floatval = d; + if (uflint.intval & 0x7FFFFFFF) { // if f==0 do nothing + uflint.intval -= n << 23; // add n to the exponent } - return d; + return uflint.floatval; } diff --git a/rtgui/options.h b/rtgui/options.h index 52422eec2..c32f67730 100644 --- a/rtgui/options.h +++ b/rtgui/options.h @@ -49,7 +49,7 @@ class SaveFormat { int tiffBits; bool tiffUncompressed; bool saveParams; - SaveFormat () : format("jpg"), jpegQuality(90), jpegSubSamp(2), pngCompression(6), pngBits(8), tiffBits(8), tiffUncompressed(true), saveParams(true) {}; + SaveFormat () : format("jpg"), pngBits(8), pngCompression(6), jpegQuality(90), jpegSubSamp(2), tiffBits(8), tiffUncompressed(true), saveParams(true) {}; }; enum ThFileType {FT_Invalid=-1, FT_None=0, FT_Raw=1, FT_Jpeg=2, FT_Tiff=3, FT_Png=4, FT_Custom=5, FT_Tiff16=6, FT_Png16=7, FT_Custom16=8};