diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index 0b4b90a31..5a21df499 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -34,7 +34,7 @@ #include "rt_math.h" #include "mytime.h" #include "sleef.c" -#include "opthelper.h" +#include "opthelper.h" #include "cplx_wavelet_dec.h" #ifdef _OPENMP @@ -47,17 +47,6 @@ #define blkrad 1 // radius of block averaging #define epsilon 0.001f/(TS*TS) //tolerance -#define PIX_SORT(a,b) { if ((a)>(b)) {temp=(a);(a)=(b);(b)=temp;} } - -#define med3(a0,a1,a2,a3,a4,a5,a6,a7,a8,median) { \ -pp[0]=a0; pp[1]=a1; pp[2]=a2; pp[3]=a3; pp[4]=a4; pp[5]=a5; pp[6]=a6; pp[7]=a7; pp[8]=a8; \ -PIX_SORT(pp[1],pp[2]); PIX_SORT(pp[4],pp[5]); PIX_SORT(pp[7],pp[8]); \ -PIX_SORT(pp[0],pp[1]); PIX_SORT(pp[3],pp[4]); PIX_SORT(pp[6],pp[7]); \ -PIX_SORT(pp[1],pp[2]); PIX_SORT(pp[4],pp[5]); PIX_SORT(pp[7],pp[8]); \ -PIX_SORT(pp[0],pp[3]); PIX_SORT(pp[5],pp[8]); PIX_SORT(pp[4],pp[7]); \ -PIX_SORT(pp[3],pp[6]); PIX_SORT(pp[1],pp[4]); PIX_SORT(pp[2],pp[5]); \ -PIX_SORT(pp[4],pp[7]); PIX_SORT(pp[4],pp[2]); PIX_SORT(pp[6],pp[4]); \ -PIX_SORT(pp[4],pp[2]); median=pp[4];} //pp4 = median #define med2(a0,a1,a2,a3,a4,median) { \ pp[0]=a0; pp[1]=a1; pp[2]=a2; pp[3]=a3; pp[4]=a4; \ @@ -105,7 +94,6 @@ median=pp[12];} #define ELEM_FLOAT_SWAP(a,b) { register float t=(a);(a)=(b);(b)=t; } - namespace rtengine { // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -124,14 +112,14 @@ namespace rtengine { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - + + extern const Settings* settings; // Median calculation using quicksort float fq_sort2(float arr[], int n) { - int low = 0; + int low = 0; int high = n-1; int median = (low + high) / 2; @@ -192,6 +180,156 @@ float media(float *elements, int N) return elements[N >> 1]; } +void ImProcFunctions::Median_Denoise( float **src, float **dst, const int width, const int height, const mediantype medianType, const int iterations, const int numThreads, float **buffer) +{ + int border=1, numElements, middleElement; + switch(medianType) { + case MED_3X3SOFT: + case MED_3X3STRONG: + border = 1; + break; + case MED_5X5SOFT: + border = 2; + break; + case MED_5X5STRONG: + border = 2; + break; + case MED_7X7: + border = 3; + numElements = 49; + middleElement = 24; + break; + default: // includes MED_9X9 + border = 4; + numElements = 81; + middleElement = 40; + } + + float **allocBuffer = NULL; + float **medBuffer[2]; + medBuffer[0] = src; + // we need a buffer if src == dst or if (src != dst && iterations > 1) + if(src == dst || (src != dst && iterations>1)) { + if(buffer == NULL) { // we didn't get a buufer => create one + allocBuffer = new float*[height]; + for (int i=0; i use it + medBuffer[1] = buffer; + } + } else { // we can write directly into destination + medBuffer[1] = dst; + } + + float ** medianIn, ** medianOut; + int BufferIndex = 0; + for(int iteration=1;iteration<=iterations;iteration++){ + medianIn = medBuffer[BufferIndex]; + medianOut = medBuffer[BufferIndex^1]; + + if(iteration == 1) { // upper border + for (int i=0; i1) +#endif + for (int i=border; i1) +#endif + for(int i = border; i < height-border; i++ ) { + for(int j = border; j < width-border; j++) { + dst[i][j] = medianOut[i][j]; + } + } + } + if(allocBuffer != NULL) { // we allocated memory, so let's free it now + for (int i=0; inrhigh : 1.0f; const bool useNoiseCCurve = (noiseCCurve && noiseCCurve.getSum() > 5.f ); - const bool useNoiseLCurve = (noiseLCurve && noiseLCurve.getSum() >= 7.f ); - - int hei; + const bool useNoiseLCurve = (noiseLCurve && noiseLCurve.getSum() >= 7.f ); + float** lumcalc; - float** ccalc; - + float* lumcalcBuffer; + float** ccalc; + float* ccalcBuffer; + bool ponder=false; float ponderCC=1.f; if(settings->leveldnautsimpl==1 && params->dirpyrDenoise.Cmethod=="PON") {ponder=true;ponderCC=0.5f;} if(settings->leveldnautsimpl==1 && params->dirpyrDenoise.Cmethod=="PRE") {ponderCC=0.5f;} if(settings->leveldnautsimpl==0 && params->dirpyrDenoise.Cmethod=="PREV") {ponderCC=0.5f;} - - const bool denoiseMethodRgb = (dnparams.dmethod == "RGB"); - + + int metchoice=0; + if(dnparams.methodmed=="Lonly") metchoice=1; + else if(dnparams.methodmed=="Lab") metchoice=2; + else if(dnparams.methodmed=="ab") metchoice=3; + else if(dnparams.methodmed=="Lpab") metchoice=4; + + const bool denoiseMethodRgb = (dnparams.dmethod == "RGB"); // init luma noisevarL - const float noiseluma=(float) dnparams.luma; + const float noiseluma=(float) dnparams.luma; const float noisevarL = (useNoiseLCurve && (denoiseMethodRgb || !isRAW)) ? (float) (SQR(((noiseluma+1.0)/125.0)*(10.+ (noiseluma+1.0)/25.0))) : (float) (SQR((noiseluma/125.0)*(1.0+ noiseluma/25.0))); - + if(useNoiseLCurve || useNoiseCCurve) { - hei=calclum->height; + int hei=calclum->height; int wid=calclum->width; TMatrix wprofi = iccStore->workingSpaceMatrix (params->icm.working); @@ -284,19 +428,21 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef {static_cast(wprofi[0][0]),static_cast(wprofi[0][1]),static_cast(wprofi[0][2])}, {static_cast(wprofi[1][0]),static_cast(wprofi[1][1]),static_cast(wprofi[1][2])}, {static_cast(wprofi[2][0]),static_cast(wprofi[2][1]),static_cast(wprofi[2][2])} - }; + }; + lumcalcBuffer = new float[hei*wid]; lumcalc = new float*[(hei)]; - for (int i=0; i<(hei); i++) - lumcalc[i] = new float[(wid)]; + for (int i=0; i32768.f) + if(LLum>32768.f) LLum = 32768.f; // not strictly necessary float kinterm = epsi + noiseLCurve[xdivf(LLum,15)*500.f]; - kinterm *= 100.f; - kinterm += noiseluma; - lumcalc[ii][jj] = SQR((kinterm/125.f)*(1.f+kinterm/25.f)); - } - if(useNoiseCCurve) { - float cN = sqrtf(SQR(AAum)+SQR(BBum)); - if(cN > 100) - ccalc[ii][jj] = SQR(1.f + ponderCC*(4.f*noiseCCurve[cN / 60.f])); - else - ccalc[ii][jj] = cn100Precalc; + kinterm *= 100.f; + kinterm += noiseluma; + lumcalc[ii][jj] = SQR((kinterm/125.f)*(1.f+kinterm/25.f)); + } + if(useNoiseCCurve) { + float cN = sqrtf(SQR(AAum)+SQR(BBum)); + if(cN > 100) + ccalc[ii][jj] = SQR(1.f + ponderCC*(4.f*noiseCCurve[cN / 60.f])); + else + ccalc[ii][jj] = cn100Precalc; } } - } - - delete calclum; + } + delete calclum; calclum = NULL; } - - const short int imheight=src->height, imwidth=src->width; + + const short int imheight=src->height, imwidth=src->width; if (dnparams.luma!=0 || dnparams.chroma!=0 || dnparams.methodmed=="Lab" || dnparams.methodmed=="Lonly" ) { // gamma transform for input data float gam = dnparams.gamma; float gamthresh = 0.001f; if(!isRAW) {//reduce gamma under 1 for Lab mode ==> TIF and JPG - if(gam < 1.9f) + if(gam < 1.9f) gam = 1.f - (1.9f-gam)/3.f;//minimum gamma 0.7 - else if (gam >= 1.9f && gam <= 3.f) + else if (gam >= 1.9f && gam <= 3.f) gam = (1.4f/1.1f)*gam - 1.41818f; } float gamslope = exp(log((double)gamthresh)/gam)/gamthresh; @@ -377,37 +522,34 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagef const float gain = pow (2.0f, float(expcomp)); float noisevar_Ldetail = SQR((float)(SQR(100.-dnparams.Ldetail) + 50.*(100.-dnparams.Ldetail)) * TS * 0.5f); - - if(settings->verbose) + + if(settings->verbose) printf("Denoise Lab=%i\n",settings->denoiselabgamma); - - // To avoid branches in loops we access the gammatabs by pointers + + // To avoid branches in loops we access the gammatabs by pointers // modify arbitrary data for Lab..I have test : nothing, gamma 2.6 11 - gamma 4 5 - gamma 5.5 10 // we can put other as gamma g=2.6 slope=11, etc. // but noting to do with real gamma !!!: it's only for data Lab # data RGB // finally I opted fot gamma55 and with options we can change - - LUTf *denoisegamtab; - LUTf *denoiseigamtab; - switch(settings->denoiselabgamma) { - case 0: denoisegamtab = &(Color::gammatab_26_11); - denoiseigamtab = &(Color::igammatab_26_11); - break; - case 1: denoisegamtab = &(Color::gammatab_4); - denoiseigamtab = &(Color::igammatab_4); - break; - default: denoisegamtab = &(Color::gammatab_55); - denoiseigamtab = &(Color::igammatab_55); - break; - } - + + LUTf *denoisegamtab; + LUTf *denoiseigamtab; + switch(settings->denoiselabgamma) { + case 0: denoisegamtab = &(Color::gammatab_26_11); + denoiseigamtab = &(Color::igammatab_26_11); + break; + case 1: denoisegamtab = &(Color::gammatab_4); + denoiseigamtab = &(Color::igammatab_4); + break; + default: denoisegamtab = &(Color::gammatab_55); + denoiseigamtab = &(Color::igammatab_55); + break; + } + array2D tilemask_in(TS,TS); array2D tilemask_out(TS,TS); const int border = MAX(2,TS/16); -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int i=0; iTS/2 ? i-TS+1 : i)); float vmask = (i1data[n] = 0; - int tilesize; int overlap; if(settings->leveldnti ==0) { @@ -438,10 +572,31 @@ if(settings->leveldnti ==1) { tilesize = 768; overlap = 96; } + int numTries = 0; + if(ponder) + printf("Tiled denoise processing caused by Automatic Multizone mode\n"); + bool memoryAllocationFailed = false; +do { + numTries++; + if(numTries == 2) + printf("1st denoise pass failed due to insufficient memory, starting 2nd (tiled) pass now...\n"); int numtiles_W, numtiles_H, tilewidth, tileheight, tileWskip, tileHskip; - Tile_calc (tilesize, overlap, 2, imwidth, imheight, numtiles_W, numtiles_H, tilewidth, tileheight, tileWskip, tileHskip); + Tile_calc (tilesize, overlap, (options.rgbDenoiseThreadLimit == 0 && !ponder) ? (numTries == 1 ? 0 : 2) : 2, imwidth, imheight, numtiles_W, numtiles_H, tilewidth, tileheight, tileWskip, tileHskip); + memoryAllocationFailed = false; + const int numtiles = numtiles_W * numtiles_H; + + //output buffer + Imagefloat * dsttmp; + if(numtiles == 1) + dsttmp = dst; + else { + dsttmp = new Imagefloat(imwidth,imheight); + for (int n=0; n<3*imwidth*imheight; n++) + dsttmp->data[n] = 0; + } + //now we have tile dimensions, overlaps //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -469,42 +624,41 @@ if(settings->leveldnti ==1) { fftwf_plan plan_backward_blox[2]; // Creating the plans with FFTW_MEASURE instead of FFTW_ESTIMATE speeds up the execute a bit - plan_forward_blox[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, Lbloxtmp, NULL, 1, TS*TS, fLbloxtmp, NULL, 1, TS*TS, fwdkind, FFTW_MEASURE ); - plan_backward_blox[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, fLbloxtmp, NULL, 1, TS*TS, Lbloxtmp, NULL, 1, TS*TS, bwdkind, FFTW_MEASURE ); - plan_forward_blox[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, Lbloxtmp, NULL, 1, TS*TS, fLbloxtmp, NULL, 1, TS*TS, fwdkind, FFTW_MEASURE ); - plan_backward_blox[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, fLbloxtmp, NULL, 1, TS*TS, Lbloxtmp, NULL, 1, TS*TS, bwdkind, FFTW_MEASURE ); + plan_forward_blox[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, Lbloxtmp, NULL, 1, TS*TS, fLbloxtmp, NULL, 1, TS*TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT ); + plan_backward_blox[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, fLbloxtmp, NULL, 1, TS*TS, Lbloxtmp, NULL, 1, TS*TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT ); + plan_forward_blox[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, Lbloxtmp, NULL, 1, TS*TS, fLbloxtmp, NULL, 1, TS*TS, fwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT ); + plan_backward_blox[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, fLbloxtmp, NULL, 1, TS*TS, Lbloxtmp, NULL, 1, TS*TS, bwdkind, FFTW_MEASURE || FFTW_DESTROY_INPUT ); fftwf_free ( Lbloxtmp ); fftwf_free ( fLbloxtmp ); - -#ifndef _OPENMP + +#ifndef _OPENMP int numthreads = 1; #else // Calculate number of tiles. If less than omp_get_max_threads(), then limit num_threads to number of tiles - int numtiles = numtiles_W * numtiles_H; int numthreads = MIN(numtiles,omp_get_max_threads()); - if(options.rgbDenoiseThreadLimit > 0) + if(options.rgbDenoiseThreadLimit > 0) numthreads = MIN(numthreads,options.rgbDenoiseThreadLimit); - denoiseNestedLevels = omp_get_max_threads() / numthreads; - bool oldNested = omp_get_nested(); - if(denoiseNestedLevels < 2) - denoiseNestedLevels = 1; - else - omp_set_nested(true); - if(options.rgbDenoiseThreadLimit > 0) - while(denoiseNestedLevels*numthreads > options.rgbDenoiseThreadLimit) - denoiseNestedLevels--; - if(settings->verbose) - printf("RGB_denoise uses %d main thread(s) and up to %d nested thread(s) for each main thread\n",numthreads,denoiseNestedLevels); -#endif - - float *LbloxArray[denoiseNestedLevels*numthreads]; - float *fLbloxArray[denoiseNestedLevels*numthreads]; - - for(int i=0;i 0) + while(denoiseNestedLevels*numthreads > options.rgbDenoiseThreadLimit) + denoiseNestedLevels--; + if(settings->verbose) + printf("RGB_denoise uses %d main thread(s) and up to %d nested thread(s) for each main thread\n",numthreads,denoiseNestedLevels); +#endif + float *LbloxArray[denoiseNestedLevels*numthreads]; + float *fLbloxArray[denoiseNestedLevels*numthreads]; + + if(numtiles > 1) + for(int i=0;iworkingSpaceInverseMatrix (params->icm.working); //inverse matrix user select const float wip[3][3] = { @@ -512,7 +666,7 @@ if(settings->leveldnti ==1) { {static_cast(wiprof[1][0]),static_cast(wiprof[1][1]),static_cast(wiprof[1][2])}, {static_cast(wiprof[2][0]),static_cast(wiprof[2][1]),static_cast(wiprof[2][2])} }; - + TMatrix wprof = iccStore->workingSpaceMatrix (params->icm.working); const float wp[3][3] = { @@ -520,23 +674,24 @@ if(settings->leveldnti ==1) { {static_cast(wprof[1][0]),static_cast(wprof[1][1]),static_cast(wprof[1][2])}, {static_cast(wprof[2][0]),static_cast(wprof[2][1]),static_cast(wprof[2][2])} }; - - + + + // begin tile processing of image #ifdef _OPENMP -#pragma omp parallel num_threads(numthreads) +#pragma omp parallel num_threads(numthreads) if(numthreads>1) #endif { - float resid=0.f; - float nbresid=0.f; - float maxredresid=0.f; - float maxblueresid=0.f; - float residred=0.f; - float residblue=0.f; - - int pos; - float* noisevarlum = new float[((tileheight+1)/2)*((tilewidth+1)/2)]; - float* noisevarchrom = new float[((tileheight+1)/2)*((tilewidth+1)/2)]; - + int pos; + float* noisevarlum; + float* noisevarchrom; + if(numtiles == 1 && isRAW) { + noisevarlum = lumcalcBuffer; + noisevarchrom = ccalcBuffer; + } else { + noisevarlum = new float[((tileheight+1)/2)*((tilewidth+1)/2)]; + noisevarchrom = new float[((tileheight+1)/2)*((tilewidth+1)/2)]; + } + #ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -560,106 +715,104 @@ if(settings->leveldnti ==1) { realred = interm_med + intermred; if (realred < 0.f) realred=0.001f; realblue = interm_med + intermblue; if (realblue < 0.f) realblue=0.001f; const float noisevarab_r = SQR(realred); - const float noisevarab_b = SQR(realblue); - // printf("Ch=%f red=%f blu=%f\n",interm_med, intermred,intermblue ); + const float noisevarab_b = SQR(realblue); - - //pixel weight - array2D totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks - - //residual between input and denoised L channel - array2D Ldetail(width,height,ARRAY2D_CLEAR_DATA); - //input L channel array2D Lin(width,height); //wavelet denoised image LabImage * labdn = new LabImage(width,height); - - //fill tile from image; convert RGB to "luma/chroma" + + //fill tile from image; convert RGB to "luma/chroma" const float maxNoiseVarab = max(noisevarab_b,noisevarab_r); if (isRAW) {//image is raw; use channel differences for chroma channels - + if(!denoiseMethodRgb){//lab mode - //modification Jacques feb 2013 and july 2014 -#ifdef _OPENMP -#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif - for (int i=tiletop; ir(i,j); - float G_ = gain*src->g(i,j); - float B_ = gain*src->b(i,j); - - R_ = (*denoiseigamtab)[R_]; - G_ = (*denoiseigamtab)[G_]; - B_ = (*denoiseigamtab)[B_]; - - //apply gamma noise standard (slider) - R_ = R_<65535.0f ? gamcurve[R_] : (Color::gammanf(R_/65535.f, gam)*32768.0f); - G_ = G_<65535.0f ? gamcurve[G_] : (Color::gammanf(G_/65535.f, gam)*32768.0f); - B_ = B_<65535.0f ? gamcurve[B_] : (Color::gammanf(B_/65535.f, gam)*32768.0f); - - //true conversion xyz=>Lab - float X,Y,Z; - Color::rgbxyz(R_,G_,B_,X,Y,Z,wp); + //modification Jacques feb 2013 and july 2014 +#ifdef _OPENMP +#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif + for (int i=tiletop; ir(i,j); + float G_ = gain*src->g(i,j); + float B_ = gain*src->b(i,j); - //convert to Lab - float L,a,b; - Color::XYZ2Lab(X, Y, Z, L, a, b); + R_ = (*denoiseigamtab)[R_]; + G_ = (*denoiseigamtab)[G_]; + B_ = (*denoiseigamtab)[B_]; - labdn->L[i1][j1] = L; - labdn->a[i1][j1] = a; - labdn->b[i1][j1] = b; - - Lin[i1][j1] = L; - - if(((i1|j1)&1) == 0) { - noisevarlum[(i1>>1)*width2+(j1>>1)] = useNoiseLCurve ? lumcalc[i>>1][j>>1] : noisevarL; - noisevarchrom[(i1>>1)*width2+(j1>>1)] = useNoiseCCurve ? maxNoiseVarab*ccalc[i>>1][j>>1] : 1.f; + //apply gamma noise standard (slider) + R_ = R_<65535.0f ? gamcurve[R_] : (Color::gammanf(R_/65535.f, gam)*32768.0f); + G_ = G_<65535.0f ? gamcurve[G_] : (Color::gammanf(G_/65535.f, gam)*32768.0f); + B_ = B_<65535.0f ? gamcurve[B_] : (Color::gammanf(B_/65535.f, gam)*32768.0f); + + //true conversion xyz=>Lab + float X,Y,Z; + Color::rgbxyz(R_,G_,B_,X,Y,Z,wp); + + //convert to Lab + float L,a,b; + Color::XYZ2Lab(X, Y, Z, L, a, b); + + labdn->L[i1][j1] = L; + labdn->a[i1][j1] = a; + labdn->b[i1][j1] = b; + + Lin[i1][j1] = L; + + if(((i1|j1)&1) == 0) { + if(numTries == 1) { + noisevarlum[(i1>>1)*width2+(j1>>1)] = useNoiseLCurve ? lumcalc[i>>1][j>>1] : noisevarL; + noisevarchrom[(i1>>1)*width2+(j1>>1)] = useNoiseCCurve ? maxNoiseVarab*ccalc[i>>1][j>>1] : 1.f; + } else { + noisevarlum[(i1>>1)*width2+(j1>>1)] = lumcalc[i>>1][j>>1]; + noisevarchrom[(i1>>1)*width2+(j1>>1)] = ccalc[i>>1][j>>1]; + } + } + //end chroma } - //end chroma - } - } - } - else {//RGB mode -#ifdef _OPENMP -#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif - for (int i=tiletop; i1) +#endif + for (int i=tiletop; ir(i,j); - float Y = gain*src->g(i,j); - float Z = gain*src->b(i,j); - //conversion colorspace to determine luminance with no gamma - X = X<65535.0f ? gamcurve[X] : (Color::gamma((double)X/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); - Y = Y<65535.0f ? gamcurve[Y] : (Color::gamma((double)Y/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); - Z = Z<65535.0f ? gamcurve[Z] : (Color::gamma((double)Z/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); - //end chroma - labdn->L[i1][j1] = Y; - labdn->a[i1][j1] = (X-Y); - labdn->b[i1][j1] = (Y-Z); + float X = gain*src->r(i,j); + float Y = gain*src->g(i,j); + float Z = gain*src->b(i,j); + //conversion colorspace to determine luminance with no gamma + X = X<65535.0f ? gamcurve[X] : (Color::gamma((double)X/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); + Y = Y<65535.0f ? gamcurve[Y] : (Color::gamma((double)Y/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); + Z = Z<65535.0f ? gamcurve[Z] : (Color::gamma((double)Z/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); + //end chroma + labdn->L[i1][j1] = Y; + labdn->a[i1][j1] = (X-Y); + labdn->b[i1][j1] = (Y-Z); - Lin[i1][j1] = Y; - - if(((i1|j1)&1) == 0) { - noisevarlum[(i1>>1)*width2+(j1>>1)] = useNoiseLCurve ? lumcalc[i>>1][j>>1] : noisevarL; - noisevarchrom[(i1>>1)*width2+(j1>>1)] = useNoiseCCurve ? maxNoiseVarab*ccalc[i>>1][j>>1] : 1.f; + Lin[i1][j1] = Y; + + if(((i1|j1)&1) == 0) { + if(numTries == 1) { + noisevarlum[(i1>>1)*width2+(j1>>1)] = useNoiseLCurve ? lumcalc[i>>1][j>>1] : noisevarL; + noisevarchrom[(i1>>1)*width2+(j1>>1)] = useNoiseCCurve ? maxNoiseVarab*ccalc[i>>1][j>>1] : 1.f; + } else { + noisevarlum[(i1>>1)*width2+(j1>>1)] = lumcalc[i>>1][j>>1]; + noisevarchrom[(i1>>1)*width2+(j1>>1)] = ccalc[i>>1][j>>1]; + } + } } } } - - } - } else {//image is not raw; use Lab parametrization -#ifdef _OPENMP -#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif +#ifdef _OPENMP +#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif for (int i=tiletop; ileveldnti ==1) { labdn->L[i1][j1] = L; labdn->a[i1][j1] = a; labdn->b[i1][j1] = b; - + Lin[i1][j1] = L; - + if(((i1|j1)&1) == 0) { - float Llum,alum,blum; + float Llum,alum,blum; if(useNoiseLCurve || useNoiseCCurve) { float XL,YL,ZL; Color::rgbxyz(rLum,gLum,bLum,XL,YL,ZL,wp); Color::XYZ2Lab(XL, YL, ZL, Llum, alum, blum); } - + if(useNoiseLCurve) { float kN = Llum; float epsi=0.01f; @@ -709,25 +862,25 @@ if(settings->leveldnti ==1) { float ki=kinterm*100.f; ki+=noiseluma; noisevarlum[(i1>>1)*width2+(j1>>1)]=SQR((ki/125.f)*(1.f+ki/25.f)); - } else { - noisevarlum[(i1>>1)*width2+(j1>>1)] = noisevarL; + } else { + noisevarlum[(i1>>1)*width2+(j1>>1)] = noisevarL; } if(useNoiseCCurve) { float aN=alum; float bN=blum; float cN=sqrtf(SQR(aN)+SQR(bN)); - if(cN < 100.f) + if(cN < 100.f) cN=100.f;//avoid divided by zero ??? float Cinterm=1.f + ponderCC*4.f*noiseCCurve[cN/60.f]; noisevarchrom[(i1>>1)*width2+(j1>>1)]= maxNoiseVarab*SQR(Cinterm); - } else { - noisevarchrom[(i1>>1)*width2+(j1>>1)] = 1.f; - } + } else { + noisevarchrom[(i1>>1)*width2+(j1>>1)] = 1.f; + } } } } } - + int datalen = labdn->W * labdn->H; //now perform basic wavelet denoise @@ -738,23 +891,23 @@ if(settings->leveldnti ==1) { float interm_medT = (float) dnparams.chroma/10.0; bool execwavelet = true; bool autoch = false; - if(noisevarL < 0.000007f && interm_medT < 0.05f && dnparams.median && (dnparams.methodmed=="Lab" || dnparams.methodmed=="Lonly")) + if(noisevarL < 0.000007f && interm_medT < 0.05f && dnparams.median && (dnparams.methodmed=="Lab" || dnparams.methodmed=="Lonly")) execwavelet=false;//do not exec wavelet if sliders luminance and chroma are very small and median need - //we considered user don't want wavelet - if(settings->leveldnautsimpl==1 && dnparams.Cmethod!="MAN") + //we considered user don't want wavelet + if(settings->leveldnautsimpl==1 && dnparams.Cmethod!="MAN") execwavelet=true; - if(settings->leveldnautsimpl==0 && dnparams.C2method!="MANU") + if(settings->leveldnautsimpl==0 && dnparams.C2method!="MANU") execwavelet=true; - if(settings->leveldnautsimpl==1 && (dnparams.Cmethod=="AUT" || dnparams.Cmethod=="PRE")) + if(settings->leveldnautsimpl==1 && (dnparams.Cmethod=="AUT" || dnparams.Cmethod=="PRE")) autoch=true; - if(settings->leveldnautsimpl==0 && (dnparams.C2method=="AUTO" || dnparams.C2method=="PREV")) + if(settings->leveldnautsimpl==0 && (dnparams.C2method=="AUTO" || dnparams.C2method=="PREV")) autoch=true; + if(execwavelet) {//gain time if user choose only median sliders L <=1 slider chrom master < 1 wavelet_decomposition* Ldecomp; wavelet_decomposition* adecomp; - wavelet_decomposition* bdecomp; - + int levwav=5; float maxreal = max(realred, realblue); //increase the level of wavelet if user increase much or very much sliders @@ -762,410 +915,181 @@ if(settings->leveldnti ==1) { else if( maxreal < 10.f)levwav=6; else if( maxreal < 15.f)levwav=7; else levwav=8;//maximum ==> I have increase Maxlevel in cplx_wavelet_dec.h from 8 to 9 - if(nrQuality == QUALITY_HIGH) + if(nrQuality == QUALITY_HIGH) levwav += settings->nrwavlevel;//increase level for enhanced mode if(levwav>8) levwav=8; // if (settings->verbose) printf("levwavelet=%i noisevarA=%f noisevarB=%f \n",levwav, noisevarab_r, noisevarab_b ); -#ifdef _OPENMP -#pragma omp parallel sections num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif -{ -#ifdef _OPENMP -#pragma omp section -#endif -{ - Ldecomp = new wavelet_decomposition (labdn->data, labdn->W, labdn->H, levwav/*maxlevels*/, 1/*subsampling*/ ); -} -#ifdef _OPENMP -#pragma omp section -#endif -{ - adecomp = new wavelet_decomposition (labdn->data+datalen, labdn->W, labdn->H,levwav, 1 ); -} -#ifdef _OPENMP -#pragma omp section -#endif -{ - bdecomp = new wavelet_decomposition (labdn->data+2*datalen, labdn->W, labdn->H, levwav, 1 ); -} -} - if(nrQuality==QUALITY_STANDARD) - WaveletDenoiseAll(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, noisevarab_r, noisevarab_b,labdn, useNoiseLCurve, useNoiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, autoch, denoiseMethodRgb );//enhance mode - else /*if(nrQuality==QUALITY_HIGH)*/ { - WaveletDenoiseAll_BiShrink(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, noisevarab_r, noisevarab_b,labdn, useNoiseLCurve, useNoiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, autoch, denoiseMethodRgb );//enhance mode - WaveletDenoiseAll(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarlum, noisevarchrom, width, height, noisevarab_r, noisevarab_b,labdn, useNoiseLCurve, useNoiseCCurve, chaut ,redaut, blueaut, maxredaut, maxblueaut, autoch, denoiseMethodRgb ); + Ldecomp = new wavelet_decomposition (labdn->data, labdn->W, labdn->H, levwav/*maxlevels*/, 1/*subsampling*/, 1, max(1,denoiseNestedLevels)); + if(Ldecomp->memoryAllocationFailed) { + memoryAllocationFailed = true; + } + float madL[8][3]; + if(!memoryAllocationFailed) { + int maxlvl = Ldecomp->maxlevel(); +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif + for (int lvl=0; lvllevel_W(lvl); + int Hlvl_L = Ldecomp->level_H(lvl); + + float ** WavCoeffs_L = Ldecomp->level_coeffs(lvl); + + if(!denoiseMethodRgb) { + madL[lvl][dir-1] = SQR(Mad(WavCoeffs_L[dir], Wlvl_L*Hlvl_L)); + } else { + madL[lvl][dir-1] = SQR(MadRgb(WavCoeffs_L[dir], Wlvl_L*Hlvl_L)); + } + } + } + } + + float chresid = 0.f; + float chresidtemp=0.f; + float chmaxresid = 0.f; + float chmaxresidtemp = 0.f; + + adecomp = new wavelet_decomposition (labdn->data+datalen, labdn->W, labdn->H,levwav, 1, 1, max(1,denoiseNestedLevels)); + if(adecomp->memoryAllocationFailed) { + memoryAllocationFailed = true; + } + if(!memoryAllocationFailed) { + if(nrQuality==QUALITY_STANDARD) { + if(!WaveletDenoiseAllAB(*Ldecomp, *adecomp, noisevarchrom, madL, width, height, noisevarab_r, useNoiseCCurve, autoch, denoiseMethodRgb ))//enhance mode + memoryAllocationFailed = true; + } else /*if(nrQuality==QUALITY_HIGH)*/ { + if(!WaveletDenoiseAll_BiShrinkAB(*Ldecomp, *adecomp, noisevarchrom, madL, width, height, noisevarab_r, useNoiseCCurve, autoch, denoiseMethodRgb ))//enhance mode + memoryAllocationFailed = true; + if(!memoryAllocationFailed) + if(!WaveletDenoiseAllAB(*Ldecomp, *adecomp, noisevarchrom, madL, width, height, noisevarab_r, useNoiseCCurve, autoch, denoiseMethodRgb )) + memoryAllocationFailed = true; + } + } + if(!memoryAllocationFailed) { + if(kall == 0) { + Noise_residualAB(*adecomp, chresid, chmaxresid, denoiseMethodRgb); + chresidtemp=chresid; + chmaxresidtemp= chmaxresid; + } + adecomp->reconstruct(labdn->data+datalen); } - //kall=0 call by Dcrop - if(kall==0) { - float chresid,chmaxredresid,chmaxblueresid,chresidred, chresidblue; - resid=0.f;residred=0.f;residblue=0.f; - Noise_residual(*Ldecomp, *adecomp, *bdecomp, width, height, chresid, chmaxredresid,chmaxblueresid, chresidred, chresidblue, resid, residblue, residred, maxredresid, maxblueresid, nbresid, denoiseMethodRgb); - //printf("NoiRESID=%3.1f maxR=%3.1f maxB=%3.1f red=%3.1f blue=%3.1f\n",chresid, chmaxredresid,chmaxblueresid, chresidred, chresidblue); - nresi=chresid; - highresi=chresid + 0.66f*(max(chmaxredresid,chmaxblueresid) - chresid);//evaluate sigma - } -#ifdef _OPENMP -#pragma omp parallel sections num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif -{ -#ifdef _OPENMP -#pragma omp section -#endif -{ - Ldecomp->reconstruct(labdn->data); - delete Ldecomp; -} -#ifdef _OPENMP -#pragma omp section -#endif -{ - adecomp->reconstruct(labdn->data+datalen); delete adecomp; -} -#ifdef _OPENMP -#pragma omp section -#endif -{ - bdecomp->reconstruct(labdn->data+2*datalen); - delete bdecomp; -} -} + if(!memoryAllocationFailed) { + wavelet_decomposition* bdecomp = new wavelet_decomposition (labdn->data+2*datalen, labdn->W, labdn->H, levwav, 1, 1, max(1,denoiseNestedLevels)); + if(bdecomp->memoryAllocationFailed) { + memoryAllocationFailed = true; + } + if(!memoryAllocationFailed) { + if(nrQuality==QUALITY_STANDARD) { + if(!WaveletDenoiseAllAB(*Ldecomp, *bdecomp, noisevarchrom, madL, width, height, noisevarab_b, useNoiseCCurve, autoch, denoiseMethodRgb ))//enhance mode + memoryAllocationFailed = true; + } else /*if(nrQuality==QUALITY_HIGH)*/ { + if(!WaveletDenoiseAll_BiShrinkAB(*Ldecomp, *bdecomp, noisevarchrom, madL, width, height, noisevarab_b, useNoiseCCurve, autoch, denoiseMethodRgb ))//enhance mode + memoryAllocationFailed = true; + if(!memoryAllocationFailed) + if(!WaveletDenoiseAllAB(*Ldecomp, *bdecomp, noisevarchrom, madL, width, height, noisevarab_b, useNoiseCCurve, autoch, denoiseMethodRgb )) + memoryAllocationFailed = true; + } + } + if(!memoryAllocationFailed) { + if(kall == 0) { + Noise_residualAB(*bdecomp, chresid, chmaxresid, denoiseMethodRgb); + chresid += chresidtemp; + chmaxresid += chmaxresidtemp; + chresid = sqrt(chresid/(6*(levwav))); + highresi = chresid + 0.66f*(sqrt(chmaxresid) - chresid);//evaluate sigma + nresi = chresid; + } + bdecomp->reconstruct(labdn->data+2*datalen); + } + delete bdecomp; + if(!memoryAllocationFailed) { + if(nrQuality==QUALITY_STANDARD) { + if(!WaveletDenoiseAllL(*Ldecomp, noisevarL, noisevarlum, madL, width, height, useNoiseLCurve, denoiseMethodRgb ))//enhance mode + memoryAllocationFailed = true; + } else /*if(nrQuality==QUALITY_HIGH)*/ { + if(!WaveletDenoiseAll_BiShrinkL(*Ldecomp, noisevarL, noisevarlum, madL, width, height, useNoiseLCurve, denoiseMethodRgb ))//enhance mode + memoryAllocationFailed = true; + if(!memoryAllocationFailed) + if(!WaveletDenoiseAllL(*Ldecomp, noisevarL, noisevarlum, madL, width, height, useNoiseLCurve, denoiseMethodRgb )) + memoryAllocationFailed = true; + } + if(!memoryAllocationFailed) + Ldecomp->reconstruct(labdn->data); + } + } + delete Ldecomp; } - - - int metchoice=0; - if(dnparams.methodmed=="Lonly") metchoice=1; - else if(dnparams.methodmed=="Lab") metchoice=2; - else if(dnparams.methodmed=="ab") metchoice=3; - else if(dnparams.methodmed=="Lpab") metchoice=4; - + if(!memoryAllocationFailed) { //median on Luminance Lab only - if( (metchoice==1 || metchoice==2 || metchoice==3 || metchoice==4) && dnparams.median) { - //printf("Lab et Lonly \n"); - float** tmL; - int wid=labdn->W; - int hei=labdn->H; - tmL = new float*[hei]; - for (int i=0; iW; + int hei=labdn->H; + tmL = new float*[hei]; + for (int i=0; iL, labdn->L, wid, hei, medianTypeL, dnparams.passes, denoiseNestedLevels, tmL); + if(metchoice==2 || metchoice==3 || metchoice==4) { + Median_Denoise( labdn->a, labdn->a, wid, hei, medianTypeAB, dnparams.passes, denoiseNestedLevels, tmL); + Median_Denoise( labdn->b, labdn->b, wid, hei, medianTypeAB, dnparams.passes, denoiseNestedLevels, tmL); } - // methmedL=methmedAB=1; - else if(dnparams.medmethod=="55") { - {if(metchoice!=4) methmedL=methmedAB=3; - else {methmedL=1;methmedAB=3;} - } - - // methmedL =methmedAB= 3; - borderL = 2; + for (int i=0; i1) -#endif - for (int i=1; iL[i][j] ,labdn->L[i-1][j], labdn->L[i+1][j] ,labdn->L[i][j+1],labdn->L[i][j-1], tmL[i][j]);//3x3 soft - } - else - for (int j=1; j impluse denoise !I let here to get trace - /* pp[0] = labdn->L[i][j-1] ; - pp[1] = labdn->L[i-1][j] ; - pp[2] = labdn->L[i][j] ; - pp[3] = labdn->L[i+1][j] ; - pp[4] = labdn->L[i][j+1] ; - // Get median - results[0] = media(pp, 5); - // Pick up x-window elements - pp[0] = labdn->L[i-1][j-1] ; - pp[1] = labdn->L[i+1][j-1] ; - pp[2] = labdn->L[i][j] ; - pp[3] = labdn->L[i-1][j+1] ; - pp[4] = labdn->L[i+1][j+1] ; - // Get median - results[1] = media(pp, 5); - // Pick up leading element - results[2] = labdn->L[i][j] ;; - // Get result - tmL[i][j] = media(results, 3); - */ - med3(labdn->L[i][j] ,labdn->L[i-1][j], labdn->L[i+1][j] ,labdn->L[i][j+1],labdn->L[i][j-1], labdn->L[i-1][j-1],labdn->L[i-1][j+1],labdn->L[i+1][j-1],labdn->L[i+1][j+1],tmL[i][j]);//3x3 soft - } - } - } - else { -#ifdef _OPENMP -#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif - for (int i=borderL; iL[i+ii][j+jj]; - } - } - fq_sort2(pp,49); - tmL[i][j]=pp[24];//7x7 - } - else if(methmedL == 5) - for (int j=borderL; jL[i+ii][j+jj]; - } - } - fq_sort2(pp,81); - tmL[i][j]=pp[40];//9x9 - } - - else if(methmedL == 3) - for (int j=2; jL[i][j],labdn->L[i-1][j],labdn->L[i+1][j],labdn->L[i][j+1],labdn->L[i][j-1],labdn->L[i-1][j-1],labdn->L[i-1][j+1], labdn->L[i+1][j-1],labdn->L[i+1][j+1], - labdn->L[i-2][j],labdn->L[i+2][j],labdn->L[i][j+2],labdn->L[i][j-2],labdn->L[i-2][j-2],labdn->L[i-2][j+2],labdn->L[i+2][j-2],labdn->L[i+2][j+2], - labdn->L[i-2][j+1],labdn->L[i+2][j+1],labdn->L[i-1][j+2],labdn->L[i-1][j-2],labdn->L[i-2][j-1],labdn->L[i+2][j-1],labdn->L[i+1][j+2],labdn->L[i+1][j-2], - tmL[i][j]);//5x5 - } - else - for (int j=2; jL[i][j];pp[1]=labdn->L[i-1][j]; pp[2]=labdn->L[i+1][j];pp[3]=labdn->L[i][j+1];pp[4]=labdn->L[i][j-1];pp[5]=labdn->L[i-1][j-1];pp[6]=labdn->L[i-1][j+1]; - pp[7]=labdn->L[i+1][j-1];pp[8]=labdn->L[i+1][j+1];pp[9]=labdn->L[i+2][j];pp[10]=labdn->L[i-2][j];pp[11]=labdn->L[i][j+2];pp[12]=labdn->L[i][j-2]; - fq_sort2(pp,13); - tmL[i][j]=pp[6];//5x5 soft - } - } - } - - - - for(int i = borderL; i < hei-borderL; i++ ) { - for(int j = borderL; j < wid-borderL; j++) { - labdn->L[i][j] = tmL[i][j]; - } - } - } - if(metchoice==2 || metchoice==3 || metchoice==4) {/*printf(" AB methmedL=%d\n", methmedL);*/ - if(methmedAB < 2) { - for (int i=1; ia[i][j] ,labdn->a[i-1][j], labdn->a[i+1][j] ,labdn->a[i][j+1],labdn->a[i][j-1], tmL[i][j]);//3x3 soft - } - else - for (int j=1; ja[i][j] ,labdn->a[i-1][j], labdn->a[i+1][j] ,labdn->a[i][j+1],labdn->a[i][j-1], labdn->a[i-1][j-1],labdn->a[i-1][j+1],labdn->a[i+1][j-1],labdn->a[i+1][j+1],tmL[i][j]);//3x3 soft - } - } - } - else { - for (int i=borderL; ia[i+ii][j+jj]; - } - } - fq_sort2(pp,49); - tmL[i][j]=pp[24];//7x7 - } - else if(methmedAB == 5) - for (int j=borderL; ja[i+ii][j+jj]; - } - } - fq_sort2(pp,81); - tmL[i][j]=pp[40];//9 - } - - else if(methmedAB == 3) - for (int j=2; ja[i][j],labdn->a[i-1][j],labdn->a[i+1][j],labdn->a[i][j+1],labdn->a[i][j-1],labdn->a[i-1][j-1],labdn->a[i-1][j+1], labdn->a[i+1][j-1],labdn->a[i+1][j+1], - labdn->a[i-2][j],labdn->a[i+2][j],labdn->a[i][j+2],labdn->a[i][j-2],labdn->a[i-2][j-2],labdn->a[i-2][j+2],labdn->a[i+2][j-2],labdn->a[i+2][j+2], - labdn->a[i-2][j+1],labdn->a[i+2][j+1],labdn->a[i-1][j+2],labdn->a[i-1][j-2],labdn->a[i-2][j-1],labdn->a[i+2][j-1],labdn->a[i+1][j+2],labdn->a[i+1][j-2], - tmL[i][j]);//5x5 - } - else - for (int j=2; ja[i][j];pp[1]=labdn->a[i-1][j]; pp[2]=labdn->a[i+1][j];pp[3]=labdn->a[i][j+1];pp[4]=labdn->a[i][j-1];pp[5]=labdn->a[i-1][j-1];pp[6]=labdn->a[i-1][j+1]; - pp[7]=labdn->a[i+1][j-1];pp[8]=labdn->a[i+1][j+1];pp[9]=labdn->a[i+2][j];pp[10]=labdn->a[i-2][j];pp[11]=labdn->a[i][j+2];pp[12]=labdn->a[i][j-2]; - fq_sort2(pp,13); - tmL[i][j]=pp[6];//5x5 soft - } - /* for (int j=3; ja[i+ii][j+jj]; - } - pp[kk+1]=labdn->a[i-3][j-3];pp[kk+2]=labdn->a[i-3][j+3];pp[kk+3]=labdn->a[i+3][j-3];pp[kk+4]=labdn->a[i+3][j+3]; - pp[kk+5]=labdn->a[i-3][j];pp[kk+6]=labdn->a[i+3][j];pp[kk+7]=labdn->a[i][j-3];pp[kk+8]=labdn->a[i][j+3]; - - } - fq_sort2(pp,33); - tmL[i][j]=pp[16];//7x7 - } - */ - } - } - - - for(int i = borderL; i < hei-borderL; i++ ) { - for(int j = borderL; j < wid-borderL; j++) { - labdn->a[i][j] = tmL[i][j]; - } - } - - -//b - if(methmedAB < 2) { - for (int i=1; ib[i][j] ,labdn->b[i-1][j], labdn->b[i+1][j] ,labdn->b[i][j+1],labdn->b[i][j-1], tmL[i][j]);//3x3 soft - } - else - for (int j=1; jb[i][j] ,labdn->b[i-1][j], labdn->b[i+1][j] ,labdn->b[i][j+1],labdn->b[i][j-1], labdn->b[i-1][j-1],labdn->b[i-1][j+1],labdn->b[i+1][j-1],labdn->b[i+1][j+1],tmL[i][j]);//3x3 soft - } - } - } - else { - for (int i=borderL; ib[i+ii][j+jj]; - } - } - fq_sort2(pp,49); - tmL[i][j]=pp[24];//7x7 - - - } - else if(methmedAB == 5) - for (int j=borderL; jb[i+ii][j+jj]; - } - } - fq_sort2(pp,81); - tmL[i][j]=pp[40];//9 - } - - else if(methmedAB == 3) - for (int j=2; jb[i][j],labdn->b[i-1][j],labdn->b[i+1][j],labdn->b[i][j+1],labdn->b[i][j-1],labdn->b[i-1][j-1],labdn->b[i-1][j+1], labdn->b[i+1][j-1],labdn->b[i+1][j+1], - labdn->b[i-2][j],labdn->b[i+2][j],labdn->b[i][j+2],labdn->b[i][j-2],labdn->b[i-2][j-2],labdn->b[i-2][j+2],labdn->b[i+2][j-2],labdn->b[i+2][j+2], - labdn->b[i-2][j+1],labdn->b[i+2][j+1],labdn->b[i-1][j+2],labdn->b[i-1][j-2],labdn->b[i-2][j-1],labdn->b[i+2][j-1],labdn->b[i+1][j+2],labdn->b[i+1][j-2], - tmL[i][j]);//5x5 - - } - else - for (int j=2; jb[i+ii][j+jj]; - } - pp[kk+1]=labdn->b[i-3][j-3];pp[kk+2]=labdn->b[i-3][j+3];pp[kk+3]=labdn->b[i+3][j-3];pp[kk+4]=labdn->b[i+3][j+3]; - pp[kk+5]=labdn->b[i-3][j];pp[kk+6]=labdn->b[i+3][j];pp[kk+7]=labdn->b[i][j-3];pp[kk+8]=labdn->b[i][j+3]; - - } - fq_sort2(pp,33); - tmL[i][j]=pp[16];//7x7 - */ - - - pp[0]=labdn->b[i][j];pp[1]=labdn->b[i-1][j]; pp[2]=labdn->b[i+1][j];pp[3]=labdn->b[i][j+1];pp[4]=labdn->b[i][j-1];pp[5]=labdn->b[i-1][j-1];pp[6]=labdn->b[i-1][j+1]; - pp[7]=labdn->b[i+1][j-1];pp[8]=labdn->b[i+1][j+1];pp[9]=labdn->b[i+2][j];pp[10]=labdn->b[i-2][j];pp[11]=labdn->b[i][j+2];pp[12]=labdn->b[i][j-2]; - fq_sort2(pp,13); - tmL[i][j]=pp[6];//5x5 soft - - } - } - } - - for(int i = borderL; i < hei-borderL; i++ ) { - for(int j = borderL; j < wid-borderL; j++) { - labdn->b[i][j] = tmL[i][j]; - } - } - } - - } - for (int i=0; ileveldnti ==1) { // patterns missed by wavelet denoise // blocks are not the same thing as tiles! - - // calculation for detail recovery blocks const int numblox_W = ceil(((float)(width))/(offset))+2*blkrad; const int numblox_H = ceil(((float)(height))/(offset))+2*blkrad; + //residual between input and denoised L channel + array2D Ldetail(width,height,ARRAY2D_CLEAR_DATA); + + //pixel weight + array2D totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks + // end of tiling calc - { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - // Main detail recovery algorithm: Block loop + // Main detail recovery algorithm: Block loop //DCT block data storage - -#ifdef _OPENMP - int masterThread = omp_get_thread_num(); -#endif -#ifdef _OPENMP -#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif -{ -#ifdef _OPENMP - int subThread = masterThread * denoiseNestedLevels + omp_get_thread_num(); -#else - int subThread = 0; -#endif - float blurbuffer[TS*TS] ALIGNED64; - float * Lblox = LbloxArray[subThread]; - float * fLblox = fLbloxArray[subThread]; - float pBuf[width + TS + 2*blkrad*offset] ALIGNED16; - float nbrwt[TS*TS] ALIGNED64; -#ifdef _OPENMP -#pragma omp for -#endif - for (int vblk=0; vblk1) +#endif +{ +#ifdef _OPENMP + int subThread = masterThread * denoiseNestedLevels + omp_get_thread_num(); +#else + int subThread = 0; +#endif + float blurbuffer[TS*TS] ALIGNED64; + float *Lblox = LbloxArray[subThread]; + float *fLblox = fLbloxArray[subThread]; + float pBuf[width + TS + 2*blkrad*offset] ALIGNED16; + float nbrwt[TS*TS] ALIGNED64; +#ifdef _OPENMP +#pragma omp for +#endif + for (int vblk=0; vblkleveldnti ==1) { //now fill this row of the blocks with Lab high pass data for (int hblk=0; hblk=0 && top+i=0 && top+ileveldnti ==1) { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% }//end of vertical block loop - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -} - } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +} + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +#ifdef _OPENMP +#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif for (int i=0; iL[i][j] += hpdn; - + labdn->L[i][j] += Ldetail[i][j]/totwt[i][j]; //note that labdn initially stores the denoised hipass data } - } + } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // transform denoised "Lab" to output RGB @@ -1310,32 +1243,37 @@ if(settings->leveldnti ==1) { //calculate mask for feathering output tile overlaps float Vmask[height+1] ALIGNED16; float Hmask[width+1] ALIGNED16; - - for (int i=0; i0) Vmask[i] = mask; - if (tilebottom0) Hmask[i] = mask/newGain; - if (tileright 1) { + for (int i=0; i0) Vmask[i] = mask; + if (tilebottom0) Hmask[i] = mask/newGain; + if (tileright1) -#endif + +#ifdef _OPENMP +#pragma omp parallel for schedule(dynamic,16) num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif for (int i=tiletop; ileveldnti ==1) { float a = labdn->a[i1][j1]; float b = labdn->b[i1][j1]; float c_h=SQR(a)+SQR(b); - if(c_h>9000000.f){ + if(c_h>9000000.f){ a *= 1.f + qhighFactor*realred; - b *= 1.f + qhighFactor*realblue; + b *= 1.f + qhighFactor*realblue; } //convert XYZ float X,Y,Z; @@ -1360,22 +1298,28 @@ if(settings->leveldnti ==1) { r_ = r_<32768.f ? igamcurve[r_] : (Color::gammanf(r_/32768.f, igam) * 65535.f); g_ = g_<32768.f ? igamcurve[g_] : (Color::gammanf(g_/32768.f, igam) * 65535.f); b_ = b_<32768.f ? igamcurve[b_] : (Color::gammanf(b_/32768.f, igam) * 65535.f); - - //readapt arbitrary gamma (inverse from beginning) - r_ = (*denoisegamtab)[r_]; - g_ = (*denoisegamtab)[g_]; + + //readapt arbitrary gamma (inverse from beginning) + r_ = (*denoisegamtab)[r_]; + g_ = (*denoisegamtab)[g_]; b_ = (*denoisegamtab)[b_]; - float factor = Vmask[i1]*Hmask[j1]; - - dsttmp->r(i,j) += factor*r_; - dsttmp->g(i,j) += factor*g_; - dsttmp->b(i,j) += factor*b_; - + if(numtiles == 1) { + dsttmp->r(i,j) = newGain*r_; + dsttmp->g(i,j) = newGain*g_; + dsttmp->b(i,j) = newGain*b_; + } else { + float factor = Vmask[i1]*Hmask[j1]; + dsttmp->r(i,j) += factor*r_; + dsttmp->g(i,j) += factor*g_; + dsttmp->b(i,j) += factor*b_; + } } } - } - else {//RGB mode + } else {//RGB mode +#ifdef _OPENMP +#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif for (int i=tiletop; ileveldnti ==1) { Y = Y<32768.0f ? igamcurve[Y] : (Color::gamma((float)Y/32768.0f, igam, igamthresh, igamslope, 1.0, 0.0) * 65535.0f); Z = Z<32768.0f ? igamcurve[Z] : (Color::gamma((float)Z/32768.0f, igam, igamthresh, igamslope, 1.0, 0.0) * 65535.0f); - float factor = Vmask[i1]*Hmask[j1]; - - dsttmp->r(i,j) += factor*X; - dsttmp->g(i,j) += factor*Y; - dsttmp->b(i,j) += factor*Z; - + if(numtiles == 1) { + dsttmp->r(i,j) = newGain*X; + dsttmp->g(i,j) = newGain*Y; + dsttmp->b(i,j) = newGain*Z; + } else { + float factor = Vmask[i1]*Hmask[j1]; + dsttmp->r(i,j) += factor*X; + dsttmp->g(i,j) += factor*Y; + dsttmp->b(i,j) += factor*Z; + } } } } } else { +#ifdef _OPENMP +#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif for (int i=tiletop; ileveldnti ==1) { float X,Y,Z; Color::Lab2XYZ(L, a, b, X, Y, Z); - float factor = Vmask[i1]*Hmask[j1]; float r_,g_,b_; Color::xyz2rgb(X,Y,Z,r_,g_,b_,wip); //gamma slider is different from Raw @@ -1430,36 +1380,50 @@ if(settings->leveldnti ==1) { g_ = g_<32768.0f ? igamcurve[g_] : (Color::gamman((float)g_/32768.0f, igam) * 65535.0f); b_ = b_<32768.0f ? igamcurve[b_] : (Color::gamman((float)b_/32768.0f, igam) * 65535.0f); - dsttmp->r(i,j) += factor*r_; - dsttmp->g(i,j) += factor*g_; - dsttmp->b(i,j) += factor*b_; - + if(numtiles == 1) { + dsttmp->r(i,j) = newGain*r_; + dsttmp->g(i,j) = newGain*g_; + dsttmp->b(i,j) = newGain*b_; + } else { + float factor = Vmask[i1]*Hmask[j1]; + dsttmp->r(i,j) += factor*r_; + dsttmp->g(i,j) += factor*g_; + dsttmp->b(i,j) += factor*b_; + } } } } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - + } delete labdn; }//end of tile row }//end of tile loop - delete [] noisevarlum; - delete [] noisevarchrom; - - } - - for(int i=0;i 1 || !isRAW) { + delete [] noisevarlum; + delete [] noisevarchrom; + } + + } + + for(int i=0;idata, dsttmp->data, 3*dst->width*dst->height*sizeof(float)); - if (!isRAW) {//restore original image gamma + if(numtiles>1) { + if(!memoryAllocationFailed) + memcpy (dst->data, dsttmp->data, 3*dst->width*dst->height*sizeof(float)); + else if(dst != src) + memcpy (dst->data, src->data, 3*dst->width*dst->height*sizeof(float)); + delete dsttmp; + } + if (!isRAW && !memoryAllocationFailed) {//restore original image gamma #ifdef _OPENMP #pragma omp parallel for #endif @@ -1468,13 +1432,16 @@ omp_set_nested(oldNested); } } - delete dsttmp; // destroy the plans fftwf_destroy_plan( plan_forward_blox[0] ); fftwf_destroy_plan( plan_backward_blox[0] ); fftwf_destroy_plan( plan_forward_blox[1] ); fftwf_destroy_plan( plan_backward_blox[1] ); -fftwf_cleanup(); +fftwf_cleanup(); +} while(memoryAllocationFailed && numTries < 2 && (options.rgbDenoiseThreadLimit == 0) && !ponder); +if(memoryAllocationFailed) + printf("tiled denoise failed due to isufficient memory. Output is not denoised!\n"); + } @@ -1529,14 +1496,14 @@ for(int iteration=1;iteration<=dnparams.passes;iteration++){ #pragma omp for for (int i=2; ir(i,j),source->r(i-1,j),source->r(i+1,j),source->r(i,j+1),source->r(i,j-1),source->r(i-1,j-1),source->r(i-1,j+1),source->r(i+1,j-1),source->r(i+1,j+1), source->r(i-2,j),source->r(i+2,j),source->r(i,j+2),source->r(i,j-2),source->r(i-2,j-2),source->r(i-2,j+2),source->r(i+2,j-2),source->r(i+2,j+2), source->r(i-2,j+1),source->r(i+2,j+1),source->r(i-1,j+2),source->r(i-1,j-2),source->r(i-2,j-1),source->r(i+2,j-1),source->r(i+1,j+2),source->r(i+1,j-2), tm[i][j]);//5x5 - } + } } else for (int j=2; jr(i,j);pp[1]=source->r(i-1,j); pp[2]=source->r(i+1,j);pp[3]=source->r(i,j+1);pp[4]=source->r(i,j-1);pp[5]=source->r(i-1,j-1);pp[6]=source->r(i-1,j+1); @@ -1572,14 +1539,14 @@ for(int iteration=1;iteration<=dnparams.passes;iteration++){ #pragma omp for for (int i=2; ib(i,j),source->b(i-1,j),source->b(i+1,j),source->b(i,j+1),source->b(i,j-1),source->b(i-1,j-1),source->b(i-1,j+1),source->b(i+1,j-1),source->b(i+1,j+1), source->b(i-2,j),source->b(i+2,j),source->b(i,j+2),source->b(i,j-2),source->b(i-2,j-2),source->b(i-2,j+2),source->b(i+2,j-2),source->b(i+2,j+2), source->b(i-2,j+1),source->b(i+2,j+1),source->b(i-1,j+2),source->b(i-1,j-2),source->b(i-2,j-1),source->b(i+2,j-1),source->b(i+1,j+2),source->b(i+1,j-2), tm[i][j]);//5x5 - } + } } else for (int j=2; jb(i,j);pp[1]=source->b(i-1,j); pp[2]=source->b(i+1,j);pp[3]=source->b(i,j+1);pp[4]=source->b(i,j-1);pp[5]=source->b(i-1,j-1);pp[6]=source->b(i-1,j+1); @@ -1617,14 +1584,14 @@ for(int iteration=1;iteration<=dnparams.passes;iteration++){ #pragma omp for for (int i=2; ig(i,j),source->g(i-1,j),source->g(i+1,j),source->g(i,j+1),source->g(i,j-1),source->g(i-1,j-1),source->g(i-1,j+1),source->g(i+1,j-1),source->g(i+1,j+1), source->g(i-2,j),source->g(i+2,j),source->g(i,j+2),source->g(i,j-2),source->g(i-2,j-2),source->g(i-2,j+2),source->g(i+2,j-2),source->g(i+2,j+2), source->g(i-2,j+1),source->g(i+2,j+1),source->g(i-1,j+2),source->g(i-1,j-2),source->g(i-2,j-1),source->g(i+2,j-1),source->g(i+1,j+2),source->g(i+1,j-2), tm[i][j]);//5x5 - } + } } else for (int j=2; jg(i,j);pp[1]=source->g(i-1,j); pp[2]=source->g(i+1,j);pp[3]=source->g(i,j+1);pp[4]=source->g(i,j-1);pp[5]=source->g(i-1,j-1);pp[6]=source->g(i-1,j+1); @@ -1652,14 +1619,11 @@ for(int iteration=1;iteration<=dnparams.passes;iteration++){ } //end median if(noiseLCurve || useNoiseCCurve) { - for (int i=0; i<(hei); i++) - delete [] lumcalc[i]; - delete [] lumcalc; - for (int i=0; i<(hei); i++) - delete [] ccalc[i]; - delete [] ccalc; - - } + delete [] lumcalcBuffer; + delete [] lumcalc; + delete [] ccalcBuffer; + delete [] ccalc; + } //#ifdef _DEBUG if (settings->verbose) { @@ -1714,7 +1678,8 @@ SSEFUNCTION void ImProcFunctions::RGBtile_denoise (float * fLblox, int hblproc, int bottom = MIN( top+TS,height); int imax = bottom - top; - //add row of tiles to output image + //add row of tiles to output image + for (int i=imin; imaxredresid ) maxredresid=madaC; - if(madbC > maxblueresid) maxblueresid=madbC; - nbresid++; + resid += madC; + if(madC >maxresid ) maxresid=madC; } - chresid=sqrt(resid/(2*nbresid)); - chmaxredresid=sqrt(maxredresid); - chmaxblueresid=sqrt(maxblueresid); - chresidred=sqrt(residred/nbresid); - chresidblue=sqrt(residblue/nbresid); } + + chresid = resid; + chmaxresid = maxresid; } -SSEFUNCTION void ImProcFunctions::WaveletDenoiseAll_BiShrink(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, bool autoch, bool denoiseMethodRgb) +SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float noisevar_L, float *noisevarlum, float madL[8][3], int width, int height, const bool useNoiseLCurve, bool denoiseMethodRgb) { int maxlvl = WaveletCoeffs_L.maxlevel(); - const float eps = 0.01f; - if(autoch && noisevar_abr <=0.001f) noisevar_abr=0.02f; - if(autoch && noisevar_abb <=0.001f) noisevar_abb=0.02f; + const float eps = 0.01f; - float madL[8][3], mada[8][3], madb[8][3]; - - int maxWL = 0, maxHL = 0; - for (int lvl=0; lvl maxWL) - maxWL = WaveletCoeffs_L.level_W(lvl); - if(WaveletCoeffs_L.level_H(lvl) > maxHL) - maxHL = WaveletCoeffs_L.level_H(lvl); - } - -#ifdef _OPENMP -#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif -{ - float *buffer[5]; - buffer[0] = new float[maxWL*maxHL+32]; - buffer[1] = new float[maxWL*maxHL+64]; - buffer[2] = new float[maxWL*maxHL+96]; - buffer[3] = new float[maxWL*maxHL+128]; - buffer[4] = new float[maxWL*maxHL+160]; - - -#ifdef _OPENMP -#pragma omp for -#endif + int maxWL = 0, maxHL = 0; for (int lvl=0; lvl maxWL) + maxWL = WaveletCoeffs_L.level_W(lvl); + if(WaveletCoeffs_L.level_H(lvl) > maxHL) + maxHL = WaveletCoeffs_L.level_H(lvl); + } + bool memoryAllocationFailed = false; +#ifdef _OPENMP +#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif +{ + float *buffer[3]; + buffer[0] = new (std::nothrow) float[maxWL*maxHL+32]; + buffer[1] = new (std::nothrow) float[maxWL*maxHL+64]; + buffer[2] = new (std::nothrow) float[maxWL*maxHL+96]; + if(buffer[0] == NULL || buffer[1] == NULL || buffer[2] == NULL) { + memoryAllocationFailed = true; + } + + if(!memoryAllocationFailed) { + +#ifdef _OPENMP +#pragma omp for schedule(dynamic) collapse(2) +#endif + for (int lvl=maxlvl-1; lvl>=0; lvl--) {//for levels less than max, use level diff to make edge mask + for (int dir=1; dir<4; dir++) { int Wlvl_L = WaveletCoeffs_L.level_W(lvl); int Hlvl_L = WaveletCoeffs_L.level_H(lvl); - int Wlvl_ab = WaveletCoeffs_a.level_W(lvl); - int Hlvl_ab = WaveletCoeffs_a.level_H(lvl); - float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(lvl); - float ** WavCoeffs_a = WaveletCoeffs_a.level_coeffs(lvl); - float ** WavCoeffs_b = WaveletCoeffs_b.level_coeffs(lvl); + if (lvl==maxlvl-1) { + ShrinkAllL(WaveletCoeffs_L, buffer, lvl, dir, noisevar_L, noisevarlum, width, height, useNoiseLCurve, denoiseMethodRgb, madL[lvl] ); + } else { + //simple wavelet shrinkage + float * sfave = buffer[0]+32; + float * sfaved = buffer[2]+96; + float * blurBuffer = buffer[1]+64; + + float mad_Lr = madL[lvl][dir-1]; + + if (noisevar_L>0.00001f) { + float levelFactor = mad_Lr*5.f/(lvl+1); +#ifdef __SSE2__ + __m128 mad_Lv; + __m128 ninev = _mm_set1_ps( 9.0f ); + __m128 epsv = _mm_set1_ps(eps); + __m128 mag_Lv; + __m128 levelFactorv = _mm_set1_ps(levelFactor); + int coeffloc_L; + for (coeffloc_L=0; coeffloc_L=0;i--) + if(buffer[i] != NULL) + delete [] buffer[i]; + +} + return (!memoryAllocationFailed); + } + +SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, + float *noisevarchrom, float madL[8][3], int width, int height, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb) + { + int maxlvl = WaveletCoeffs_L.maxlevel(); + if(autoch && noisevar_ab <=0.001f) noisevar_ab=0.02f; + + float madab[8][3]; + + int maxWL = 0, maxHL = 0; + for (int lvl=0; lvl maxWL) + maxWL = WaveletCoeffs_L.level_W(lvl); + if(WaveletCoeffs_L.level_H(lvl) > maxHL) + maxHL = WaveletCoeffs_L.level_H(lvl); + } + bool memoryAllocationFailed = false; +#ifdef _OPENMP +#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif +{ + float *buffer[3]; + buffer[0] = new (std::nothrow) float[maxWL*maxHL+32]; + buffer[1] = new (std::nothrow) float[maxWL*maxHL+64]; + buffer[2] = new (std::nothrow) float[maxWL*maxHL+96]; + if(buffer[0] == NULL || buffer[1] == NULL || buffer[2] == NULL) { + memoryAllocationFailed = true; + } + + if(!memoryAllocationFailed) { + + +#ifdef _OPENMP +#pragma omp for schedule(dynamic) collapse(2) +#endif + for (int lvl=0; lvl=0; lvl--) {//for levels less than max, use level diff to make edge mask - int Wlvl_L = WaveletCoeffs_L.level_W(lvl); - int Hlvl_L = WaveletCoeffs_L.level_H(lvl); - - int Wlvl_ab = WaveletCoeffs_a.level_W(lvl); - int Hlvl_ab = WaveletCoeffs_a.level_H(lvl); + for (int dir=1; dir<4; dir++) { + int Wlvl_ab = WaveletCoeffs_ab.level_W(lvl); + int Hlvl_ab = WaveletCoeffs_ab.level_H(lvl); - int skip_L = WaveletCoeffs_L.level_stride(lvl); - int skip_ab = WaveletCoeffs_a.level_stride(lvl); - - float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(lvl); - float ** WavCoeffs_a = WaveletCoeffs_a.level_coeffs(lvl); - float ** WavCoeffs_b = WaveletCoeffs_b.level_coeffs(lvl); - if (lvl==maxlvl-1) { - ShrinkAll(WavCoeffs_L, WavCoeffs_a, WavCoeffs_b, buffer, lvl, Wlvl_L, Hlvl_L, Wlvl_ab, Hlvl_ab, - skip_L, skip_ab, noisevar_L, noisevarlum, noisevarchrom, width, height, noisevar_abr, noisevar_abb, noi, useNoiseLCurve, useNoiseCCurve, chaut, redaut, blueaut, maxredaut, maxblueaut, autoch, denoiseMethodRgb, mada[lvl], madb[lvl], madL[lvl], true); + float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(lvl); + float ** WavCoeffs_ab = WaveletCoeffs_ab.level_coeffs(lvl); + if (lvl==maxlvl-1) { + ShrinkAllAB(WaveletCoeffs_L, WaveletCoeffs_ab, buffer, lvl, dir, noisevarchrom, width, height, noisevar_ab, useNoiseCCurve, autoch, denoiseMethodRgb, madL[lvl], madab[lvl], true); + } else { + //simple wavelet shrinkage - } else { - //simple wavelet shrinkage - float * sfave = buffer[0]+32; - float * sfaved = buffer[3]+128; - float * blurBuffer = buffer[1]+64; - - for (int dir=1; dir<4; dir++) { float mad_Lr = madL[lvl][dir-1]; - float mad_ar = useNoiseCCurve ? noisevar_abr*mada[lvl][dir-1] : SQR(noisevar_abr)*mada[lvl][dir-1]; - float mad_br = useNoiseCCurve ? noisevar_abb*madb[lvl][dir-1] : SQR(noisevar_abb)*madb[lvl][dir-1] ; - - if (noisevar_abr>0.001f || noisevar_abb>0.001f) { - + float mad_abr = useNoiseCCurve ? noisevar_ab*madab[lvl][dir-1] : SQR(noisevar_ab)*madab[lvl][dir-1]; + + if (noisevar_ab>0.001f) { + #ifdef __SSE2__ - __m128 onev = _mm_set1_ps(1.f); - __m128 mad_arv = _mm_set1_ps(mad_ar); - __m128 mad_brv = _mm_set1_ps(mad_br); + __m128 onev = _mm_set1_ps(1.f); + __m128 mad_abrv = _mm_set1_ps(mad_abr); __m128 rmad_Lm9v = onev / _mm_set1_ps(mad_Lr * 9.f); - __m128 mad_av; - __m128 mad_bv; - __m128 mag_Lv, mag_av, mag_bv; - __m128 tempav, tempbv; + __m128 mad_abv; + __m128 mag_Lv, mag_abv; + __m128 tempabv; int coeffloc_ab; - for (coeffloc_ab=0; coeffloc_ab0.00001f) { - float levelFactor = mad_Lr*5.f/(lvl+1); -#ifdef __SSE2__ - __m128 mad_Lv; - __m128 ninev = _mm_set1_ps( 9.0f ); - __m128 epsv = _mm_set1_ps(eps); - __m128 mag_Lv; - __m128 levelFactorv = _mm_set1_ps(levelFactor); - int coeffloc_L; - for (coeffloc_L=0; coeffloc_L=0;i--) - delete [] buffer[i]; - + } + } + for(int i=2;i>=0;i--) + if(buffer[i] != NULL) + delete [] buffer[i]; + } + return (!memoryAllocationFailed); } - void ImProcFunctions::WaveletDenoiseAll(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut,float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, bool autoch, bool denoiseMethodRgb)//mod JD + bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, float noisevar_L, float *noisevarlum, float madL[8][3], int width, int height, const bool useNoiseLCurve, bool denoiseMethodRgb)//mod JD { - - int maxlvl = WaveletCoeffs_L.maxlevel(); - int maxWL = 0, maxHL = 0; - for (int lvl=0; lvl maxWL) - maxWL = WaveletCoeffs_L.level_W(lvl); - if(WaveletCoeffs_L.level_H(lvl) > maxHL) - maxHL = WaveletCoeffs_L.level_H(lvl); - } -#ifdef _OPENMP -#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) -#endif -{ - float *buffer[5]; - buffer[0] = new float[maxWL*maxHL+32]; - buffer[1] = new float[maxWL*maxHL+64]; - buffer[2] = new float[maxWL*maxHL+96]; - buffer[3] = new float[maxWL*maxHL+128]; - buffer[4] = new float[maxWL*maxHL+160]; - - -#ifdef _OPENMP -#pragma omp for -#endif - + int maxlvl = WaveletCoeffs_L.maxlevel(); + int maxWL = 0, maxHL = 0; for (int lvl=0; lvl=0;i--) - delete [] buffer[i]; + if(WaveletCoeffs_L.level_W(lvl) > maxWL) + maxWL = WaveletCoeffs_L.level_W(lvl); + if(WaveletCoeffs_L.level_H(lvl) > maxHL) + maxHL = WaveletCoeffs_L.level_H(lvl); + } + bool memoryAllocationFailed = false; +#ifdef _OPENMP +#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif +{ + float *buffer[3]; + buffer[0] = new (std::nothrow) float[maxWL*maxHL+32]; + buffer[1] = new (std::nothrow) float[maxWL*maxHL+64]; + buffer[2] = new (std::nothrow) float[maxWL*maxHL+96]; + if(buffer[0] == NULL || buffer[1] == NULL || buffer[2] == NULL) { + memoryAllocationFailed = true; + } + + if(!memoryAllocationFailed) { +#ifdef _OPENMP +#pragma omp for schedule(dynamic) collapse(2) +#endif + for (int lvl=0; lvl=0;i--) + if(buffer[i] != NULL) + delete [] buffer[i]; } + return (!memoryAllocationFailed); } + + + bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, + float *noisevarchrom, float madL[8][3], int width, int height, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb)//mod JD + { + + int maxlvl = WaveletCoeffs_L.maxlevel(); + int maxWL = 0, maxHL = 0; + for (int lvl=0; lvl maxWL) + maxWL = WaveletCoeffs_L.level_W(lvl); + if(WaveletCoeffs_L.level_H(lvl) > maxHL) + maxHL = WaveletCoeffs_L.level_H(lvl); + } + bool memoryAllocationFailed = false; +#ifdef _OPENMP +#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1) +#endif +{ + float *buffer[3]; + buffer[0] = new (std::nothrow) float[maxWL*maxHL+32]; + buffer[1] = new (std::nothrow) float[maxWL*maxHL+64]; + buffer[2] = new (std::nothrow) float[maxWL*maxHL+96]; + if(buffer[0] == NULL || buffer[1] == NULL || buffer[2] == NULL) { + memoryAllocationFailed = true; + } + + if(!memoryAllocationFailed) { +#ifdef _OPENMP +#pragma omp for schedule(dynamic) collapse(2) +#endif + for (int lvl=0; lvl=0;i--) + if(buffer[i] != NULL) + delete [] buffer[i]; +} + return (!memoryAllocationFailed); + } + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -SSEFUNCTION void ImProcFunctions::ShrinkAll(float ** WavCoeffs_L, float ** WavCoeffs_a, float ** WavCoeffs_b, float **buffer, int level, - int W_L, int H_L, int W_ab, int H_ab,int skip_L, int skip_ab, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, - float &maxredaut, float &maxblueaut, bool autoch, bool denoiseMethodRgb, float * madaa, float * madab, float * madaL, bool madCalculated ) - { + +SSEFUNCTION void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, + float noisevar_L, float *noisevarlum, int width, int height, const bool useNoiseLCurve, + bool denoiseMethodRgb, float * madL ) + + { //simple wavelet shrinkage const float eps = 0.01f; - if(autoch && noisevar_abr <=0.001f) - noisevar_abr=0.02f; - if(autoch && noisevar_abb <=0.001f) - noisevar_abb=0.02f; - float * sfavea = buffer[0]+32; - float * sfaveb = buffer[1]+64; - float * sfavead = buffer[3]+128; - float * sfavebd = buffer[4]+160; - float * sfave = sfavea; // we can safely reuse sfavea here, because they are not used together - float * sfaved = sfavead; // we can safely reuse sfavead here, because they are not used together - float * blurBuffer = buffer[2]+96; - - for (int dir=1; dir<4; dir++) { - float mada, madb, madL; - if(madCalculated) { - mada = madaa[dir-1]; - madb = madab[dir-1]; - madL = madaL[dir-1] ; - } else { - if(!denoiseMethodRgb) { - madL = SQR(Mad(WavCoeffs_L[dir], W_L*H_L)); - mada = SQR(Mad(WavCoeffs_a[dir], W_ab*H_ab)); - madb = SQR(Mad(WavCoeffs_b[dir], W_ab*H_ab)); - } else { - madL = SQR(MadRgb(WavCoeffs_L[dir], W_L*H_L)); - mada = SQR(MadRgb(WavCoeffs_a[dir], W_ab*H_ab)); - madb = SQR(MadRgb(WavCoeffs_b[dir], W_ab*H_ab)); - } - } + float * sfave = buffer[0]+32; + float * sfaved = buffer[1]+64; + float * blurBuffer = buffer[2]+96; - if (noisevar_abr>0.001f || noisevar_abb>0.001f ) { - mada = useNoiseCCurve ? mada : mada * noisevar_abr; - madb = useNoiseCCurve ? madb : madb * noisevar_abb; + int W_L = WaveletCoeffs_L.level_W(level); + int H_L = WaveletCoeffs_L.level_H(level); + + float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(level); + + float mad_L = madL[dir-1] ; + + if (noisevar_L>0.00001f) { + float levelFactor = mad_L * 5.f / (float)(level+1); #ifdef __SSE2__ - __m128 onev = _mm_set1_ps(1.f); - __m128 mad_arv = _mm_set1_ps(mada); - __m128 mad_brv = _mm_set1_ps(madb); - - __m128 rmadLm9v = onev / _mm_set1_ps(madL * 9.f); - __m128 mad_av ; - __m128 mad_bv; - __m128 mag_Lv, mag_av, mag_bv; - int coeffloc_ab; - for (coeffloc_ab=0; coeffloc_ab0.00001f) { - float levelFactor = madL * 5.f / (float)(level+1); -#ifdef __SSE2__ - __m128 magv; - __m128 levelFactorv = _mm_set1_ps(levelFactor); - __m128 mad_Lv; - __m128 ninev = _mm_set1_ps( 9.0f ); - __m128 epsv = _mm_set1_ps( eps ); - int i; - for (i=0; i0.001f) { + madab = useNoiseCCurve ? madab : madab * noisevar_ab; +#ifdef __SSE2__ + __m128 onev = _mm_set1_ps(1.f); + __m128 mad_abrv = _mm_set1_ps(madab); + + __m128 rmadLm9v = onev / _mm_set1_ps(mad_L * 9.f); + __m128 mad_abv ; + __m128 mag_Lv, mag_abv; + int coeffloc_ab; + for (coeffloc_ab=0; coeffloc_ab -0.8f && noisevarhue[i][j] < 2.0f && noisevarchrom[i][j] > 10000.f) {//saturated red yellow - red_yel += noisevarchrom[i][j]; - nry++; + red_yel += noisevarchrom[i][j]; + nry++; } if(noisevarhue[i][j] > 0.f && noisevarhue[i][j] < 1.6f && noisevarchrom[i][j] < 10000.f) {//skin - skin_c += noisevarchrom[i][j]; - nsk++; + skin_c += noisevarchrom[i][j]; + nsk++; } - lume += noisevarlum[i][j]; + lume += noisevarlum[i][j]; nL++; devL += SQR(noisevarlum[i][j]-(lume/nL)); } @@ -2368,30 +2373,30 @@ SSEFUNCTION void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** if(nsk>0) skinc=skin_c/nsk; } - + const float reduc = (schoice == 2) ? (float) settings->nrhigh : 1.f; for (int dir=1; dir<4; dir++) { - float mada, madb; + float mada, madb; if(!perf) - mada = SQR(Mad(WavCoeffs_a[dir], W_ab*H_ab)); - else + mada = SQR(Mad(WavCoeffs_a[dir], W_ab*H_ab)); + else mada = SQR(MadRgb(WavCoeffs_a[dir], W_ab*H_ab)); chred += mada; - if(mada > maxchred) + if(mada > maxchred) maxchred = mada; - if(mada < minchred) + if(mada < minchred) minchred = mada; - maxredaut = sqrt(reduc*maxchred); + maxredaut = sqrt(reduc*maxchred); minredaut = sqrt(reduc*minchred); - + if(!perf) - madb = SQR(Mad(WavCoeffs_b[dir], W_ab*H_ab)); - else + madb = SQR(Mad(WavCoeffs_b[dir], W_ab*H_ab)); + else madb = SQR(MadRgb(WavCoeffs_b[dir], W_ab*H_ab)); chblue += madb; - if(madb > maxchblue) + if(madb > maxchblue) maxchblue = madb; - if(madb < minchblue) + if(madb < minchblue) minchblue = madb; maxblueaut = sqrt(reduc*maxchblue); minblueaut = sqrt(reduc*minchblue); @@ -2403,10 +2408,10 @@ SSEFUNCTION void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** redaut = sqrt(reduc*chred/nb); blueaut = sqrt(reduc*chblue/nb); Nb = nb; - } + } } - + void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition &WaveletCoeffs_a, wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut,int schoice, bool autoch, @@ -2430,17 +2435,17 @@ void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition & } } -void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope) { +void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope) { gam = dnparams.gamma; gamthresh = 0.001f; if(!isRAW) {//reduce gamma under 1 for Lab mode ==> TIF and JPG - if(gam <1.9f) + if(gam <1.9f) gam=1.f - (1.9f-gam)/3.f;//minimum gamma 0.7 - else if (gam >= 1.9f && gam <= 3.f) + else if (gam >= 1.9f && gam <= 3.f) gam=(1.4f/1.1f)*gam - 1.41818f; } gamslope = exp(log((double)gamthresh)/gam)/gamthresh; - bool perf = (dnparams.dmethod=="RGB"); + bool perf = (dnparams.dmethod=="RGB"); if(perf) { for (int i=0; i<65536; i++) { gamcurve[i] = (Color::gamma((double)i/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)) * 32768.0f; @@ -2456,13 +2461,13 @@ void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoisePa void ImProcFunctions::calcautodn_info (float &chaut, float &delta, int Nb, int levaut, float maxmax, float lumema, float chromina, int mode, int lissage, float redyel, float skinc, float nsknc) { float reducdelta=1.f; - if (params->dirpyrDenoise.smethod == "shalbi") + if (params->dirpyrDenoise.smethod == "shalbi") reducdelta = (float)settings->nrhigh; chaut = (chaut*Nb-maxmax)/(Nb-1);//suppress maximum for chaut calcul - if ((redyel > 5000.f || skinc > 1000.f) && nsknc < 0.4f && chromina > 3000.f) + if ((redyel > 5000.f || skinc > 1000.f) && nsknc < 0.4f && chromina > 3000.f) chaut *= 0.45f;//reduct action in red zone, except skin for high / med chroma - else if ((redyel>12000.f || skinc > 1200.f) && nsknc < 0.3f && chromina > 3000.f) + else if ((redyel>12000.f || skinc > 1200.f) && nsknc < 0.3f && chromina > 3000.f) chaut *= 0.3f; if (mode==0 || mode==2) {//Preview or Auto multizone @@ -2483,11 +2488,11 @@ void ImProcFunctions::calcautodn_info (float &chaut, float &delta, int Nb, int l else if (lumema < 5000.f) chaut *= 1.1f; else if (lumema > 20000.f) chaut *= 0.9f;//decrease for high light } - + if(levaut==0) {//Low denoise - if(chaut > 300.f) + if(chaut > 300.f) chaut = 0.714286f*chaut + 85.71428f; - } + } delta = maxmax-chaut; delta *= reducdelta; @@ -2532,12 +2537,12 @@ void ImProcFunctions::calcautodn_info (float &chaut, float &delta, int Nb, int l } SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &nresi, float &highresi, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread) -{ +{ if ((settings->leveldnautsimpl==1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl==0 && dnparams.C2method == "MANU")) { //nothing to do return; } - + int hei,wid; float** lumcalc; float** acalc; @@ -2550,8 +2555,8 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat {static_cast(wprofi[0][0]),static_cast(wprofi[0][1]),static_cast(wprofi[0][2])}, {static_cast(wprofi[1][0]),static_cast(wprofi[1][1]),static_cast(wprofi[1][2])}, {static_cast(wprofi[2][0]),static_cast(wprofi[2][1]),static_cast(wprofi[2][2])} - }; - + }; + lumcalc = new float*[hei]; for (int i=0; idenoiselabgamma;//gamma lab essentialy for Luminance detail - if(gamlab > 2) - gamlab = 2; int tilesize; int overlap; @@ -2627,18 +2629,18 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat float minchblue=100000000.f; int nb=0; int comptlevel=0; - - // To avoid branches in loops we access the gammatabs by pointers - LUTf *denoiseigamtab; - switch(settings->denoiselabgamma) { - case 0: denoiseigamtab = &(Color::igammatab_26_11); - break; - case 1: denoiseigamtab = &(Color::igammatab_4); - break; - default: denoiseigamtab = &(Color::igammatab_55); - break; - } - + + // To avoid branches in loops we access the gammatabs by pointers + LUTf *denoiseigamtab; + switch(settings->denoiselabgamma) { + case 0: denoiseigamtab = &(Color::igammatab_26_11); + break; + case 1: denoiseigamtab = &(Color::igammatab_4); + break; + default: denoiseigamtab = &(Color::igammatab_55); + break; + } + for (int tiletop=0; tiletop 0.) - intermred = (dnparams.redchro/10.); - else + if(dnparams.redchro > 0.) + intermred = (dnparams.redchro/10.); + else intermred = (float) dnparams.redchro/7.0;//increase slower than linear for more sensit - if(dnparams.bluechro > 0.) - intermblue =(dnparams.bluechro/10.); - else + if(dnparams.bluechro > 0.) + intermblue =(dnparams.bluechro/10.); + else intermblue = (float) dnparams.bluechro/7.0;//increase slower than linear for more sensit - realred = interm_med + intermred; - if (realred < 0.f) + realred = interm_med + intermred; + if (realred < 0.f) realred = 0.001f; - realblue = interm_med + intermblue; - if (realblue < 0.f) + realblue = interm_med + intermblue; + if (realblue < 0.f) realblue = 0.001f; //TODO: implement using AlignedBufferMP //fill tile from image; convert RGB to "luma/chroma" if (isRAW) {//image is raw; use channel differences for chroma channels -#ifdef _OPENMP -#pragma omp parallel for if(multiThread) -#endif +#ifdef _OPENMP +#pragma omp parallel for if(multiThread) +#endif for (int i=tiletop; i>1][j>>1]); - bNv = LVFU(bcalc[i>>1][j>>1]); - _mm_storeu_ps(&noisevarhue[i1>>1][j1>>1], xatan2f(bNv,aNv)); + int j1 = j - tileleft; + aNv = LVFU(acalc[i>>1][j>>1]); + bNv = LVFU(bcalc[i>>1][j>>1]); + _mm_storeu_ps(&noisevarhue[i1>>1][j1>>1], xatan2f(bNv,aNv)); _mm_storeu_ps(&noisevarchrom[i1>>1][j1>>1], _mm_max_ps(c100v,_mm_sqrt_ps(SQRV(aNv)+SQRV(bNv)))); - } + } for (; j>1][j>>1]; float bN = bcalc[i>>1][j>>1]; float cN = sqrtf(SQR(aN)+SQR(bN)); noisevarhue[i1>>1][j1>>1] = xatan2f(bN,aN); - if(cN < 100.f) + if(cN < 100.f) cN=100.f;//avoid divided by zero noisevarchrom[i1>>1][j1>>1] = cN; - } -#else + } +#else for (int j=tileleft; j>1][j>>1]; float bN = bcalc[i>>1][j>>1]; float cN = sqrtf(SQR(aN)+SQR(bN)); float hN = xatan2f(bN,aN); - if(cN < 100.f) + if(cN < 100.f) cN = 100.f;//avoid divided by zero noisevarchrom[i1>>1][j1>>1] = cN; noisevarhue[i1>>1][j1>>1] = hN; - } + } #endif - } -#ifdef _OPENMP -#pragma omp parallel for if(multiThread) -#endif - for (int i=tiletop; i>1][j>>1]; - Llum = Llum < 2.f ? 2.f : Llum; //avoid divided by zero ? - Llum = Llum > 32768.f ? 32768.f : Llum; // not strictly necessary + float Llum = lumcalc[i>>1][j>>1]; + Llum = Llum < 2.f ? 2.f : Llum; //avoid divided by zero ? + Llum = Llum > 32768.f ? 32768.f : Llum; // not strictly necessary noisevarlum[i1>>1][j1>>1]= Llum; - } - } - if (!perf){//lab mode, modification Jacques feb 2013 and july 2014 - -#ifdef _OPENMP -#pragma omp parallel for if(multiThread) -#endif + } + } + if (!perf){//lab mode, modification Jacques feb 2013 and july 2014 + +#ifdef _OPENMP +#pragma omp parallel for if(multiThread) +#endif for (int i=tiletop; ir(i,j); float G_ = gain*src->g(i,j); float B_ = gain*src->b(i,j); - - R_ = (*denoiseigamtab)[R_]; - G_ = (*denoiseigamtab)[G_]; - B_ = (*denoiseigamtab)[B_]; - + + R_ = (*denoiseigamtab)[R_]; + G_ = (*denoiseigamtab)[G_]; + B_ = (*denoiseigamtab)[B_]; + //apply gamma noise standard (slider) R_ = R_<65535.0f ? gamcurve[R_] : (Color::gamman((double)R_/65535.0, gam)*32768.0f); G_ = G_<65535.0f ? gamcurve[G_] : (Color::gamman((double)G_/65535.0, gam)*32768.0f); @@ -2784,7 +2786,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat X = X<65535.0f ? gamcurve[X] : (Color::gamma((double)X/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); Y = Y<65535.0f ? gamcurve[Y] : (Color::gamma((double)Y/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); Z = Z<65535.0f ? gamcurve[Z] : (Color::gamma((double)Z/65535.0, gam, gamthresh, gamslope, 1.0, 0.0)*32768.0f); - + labdn->a[i1][j1] = (X-Y); labdn->b[i1][j1] = (Y-Z); } @@ -2826,7 +2828,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat Color::XYZ2Lab(XL, YL, ZL, Llum, alum, blum); float kN=Llum; if(kN<2.f) kN=2.f; - if(kN>32768.f) kN=32768.f; + if(kN>32768.f) kN=32768.f; noisevarlum[i1>>1][j1>>1]=kN; float aN=alum; float bN=blum; @@ -2835,7 +2837,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat if(cN < 100.f) cN=100.f;//avoid divided by zero noisevarchrom[i1>>1][j1>>1]=cN; noisevarhue[i1>>1][j1>>1]=hN; - } + } labdn->a[i1][j1] = a; labdn->b[i1][j1] = b; } @@ -2853,31 +2855,31 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat wavelet_decomposition* adecomp; wavelet_decomposition* bdecomp; - + int schoice = 0;//shrink method - if (dnparams.smethod == "shalbi") + if (dnparams.smethod == "shalbi") schoice=2; const int levwav=5; -#ifdef _OPENMP -#pragma omp parallel sections if(multiThread) -#endif -{ -#ifdef _OPENMP -#pragma omp section -#endif -{ +#ifdef _OPENMP +#pragma omp parallel sections if(multiThread) +#endif +{ +#ifdef _OPENMP +#pragma omp section +#endif +{ adecomp = new wavelet_decomposition (labdn->data+datalen, labdn->W, labdn->H, levwav, 1 ); -} -#ifdef _OPENMP -#pragma omp section -#endif -{ - bdecomp = new wavelet_decomposition (labdn->data+2*datalen, labdn->W, labdn->H, levwav, 1 ); -} -} - bool autoch = dnparams.autochroma; - if(comptlevel==0) WaveletDenoiseAll_info(levwav, *adecomp, *bdecomp, noisevarlum, noisevarchrom, noisevarhue, width, height, noisevarab_r, noisevarab_b, labdn, chaut, Nb, redaut, blueaut, maxredaut, maxblueaut, minredaut, minblueaut, schoice, autoch, chromina, sigma, lumema, sigma_L, redyel, skinc, nsknc,maxchred, maxchblue, minchred, minchblue, nb,chau ,chred, chblue, perf, multiThread);//enhance mode +} +#ifdef _OPENMP +#pragma omp section +#endif +{ + bdecomp = new wavelet_decomposition (labdn->data+2*datalen, labdn->W, labdn->H, levwav, 1 ); +} +} + bool autoch = dnparams.autochroma; + if(comptlevel==0) WaveletDenoiseAll_info(levwav, *adecomp, *bdecomp, noisevarlum, noisevarchrom, noisevarhue, width, height, noisevarab_r, noisevarab_b, labdn, chaut, Nb, redaut, blueaut, maxredaut, maxblueaut, minredaut, minblueaut, schoice, autoch, chromina, sigma, lumema, sigma_L, redyel, skinc, nsknc,maxchred, maxchblue, minchred, minchblue, nb,chau ,chred, chblue, perf, multiThread);//enhance mode comptlevel+=1; float chresid,chmaxredresid,chmaxblueresid; nresi=chresid; diff --git a/rtengine/PF_correct_RT.cc b/rtengine/PF_correct_RT.cc index 5d884cda0..9da12afea 100644 --- a/rtengine/PF_correct_RT.cc +++ b/rtengine/PF_correct_RT.cc @@ -30,10 +30,7 @@ #include "mytime.h" #include "../rtgui/myflatcurve.h" #include "rt_math.h" - -#ifdef __SSE2__ -#include "sleefsseavx.c" -#endif +#include "opthelper.h" #ifdef _OPENMP #include @@ -44,11 +41,7 @@ using namespace std; namespace rtengine { extern const Settings* settings; -#if defined( __SSE2__ ) && defined( WIN32 ) -__attribute__((force_align_arg_pointer)) void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radius, int thresh) -#else -void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radius, int thresh) -#endif +SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radius, int thresh) { const int halfwin = ceil(2*radius)+1; @@ -117,8 +110,7 @@ float chromave=0.0f; // no precalculated values without SSE => calculate float HH=xatan2f(src->b[i][j],src->a[i][j]); #endif - double hr; - float chparam = float((chCurve->getVal((hr=Color::huelab_to_huehsv2(HH)))-0.5f) * 2.0f);//get C=f(H) + float chparam = float((chCurve->getVal((Color::huelab_to_huehsv2(HH)))-0.5f) * 2.0f);//get C=f(H) if(chparam > 0.f) chparam /=2.f; // reduced action if chparam > 0 chromaChfactor=1.0f+chparam; } @@ -267,11 +259,8 @@ float chromave=0.0f; if(chCurve) delete chCurve; free(fringe); } -#if defined( __SSE2__ ) && defined( WIN32 ) -__attribute__((force_align_arg_pointer)) void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double radius, int thresh) -#else -void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double radius, int thresh) -#endif + +SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double radius, int thresh) { const int halfwin = ceil(2*radius)+1; @@ -396,8 +385,7 @@ if( chCurve ) { // no precalculated values without SSE => calculate float HH=xatan2f(srbb[i][j],sraa[i][j]); #endif - double hr; - float chparam = float((chCurve->getVal((hr=Color::huelab_to_huehsv2(HH)))-0.5f) * 2.0f);//get C=f(H) + float chparam = float((chCurve->getVal((Color::huelab_to_huehsv2(HH)))-0.5f) * 2.0f);//get C=f(H) if(chparam > 0.f) chparam /=2.f; // reduced action if chparam > 0 chromaChfactor=1.0f+chparam; } @@ -579,24 +567,7 @@ if( chCurve ) { free(fringe); } - -#define PIX_SORT(a,b) { if ((a)>(b)) {temp=(a);(a)=(b);(b)=temp;} } - -#define med3(a0,a1,a2,a3,a4,a5,a6,a7,a8,median) { \ -pp[0]=a0; pp[1]=a1; pp[2]=a2; pp[3]=a3; pp[4]=a4; pp[5]=a5; pp[6]=a6; pp[7]=a7; pp[8]=a8; \ -PIX_SORT(pp[1],pp[2]); PIX_SORT(pp[4],pp[5]); PIX_SORT(pp[7],pp[8]); \ -PIX_SORT(pp[0],pp[1]); PIX_SORT(pp[3],pp[4]); PIX_SORT(pp[6],pp[7]); \ -PIX_SORT(pp[1],pp[2]); PIX_SORT(pp[4],pp[5]); PIX_SORT(pp[7],pp[8]); \ -PIX_SORT(pp[0],pp[3]); PIX_SORT(pp[5],pp[8]); PIX_SORT(pp[4],pp[7]); \ -PIX_SORT(pp[3],pp[6]); PIX_SORT(pp[1],pp[4]); PIX_SORT(pp[2],pp[5]); \ -PIX_SORT(pp[4],pp[7]); PIX_SORT(pp[4],pp[2]); PIX_SORT(pp[6],pp[4]); \ -PIX_SORT(pp[4],pp[2]); median=pp[4];} //pp4 = median - -#if defined( __SSE2__ ) && defined( WIN32 ) -__attribute__((force_align_arg_pointer)) void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius, int thresh, int mode, float b_l, float t_l, float t_r, float b_r, float skinprot, float chrom, int hotbad) -#else -void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius, int thresh, int mode, float b_l, float t_l, float t_r, float b_r, float skinprot, float chrom, int hotbad) -#endif +SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius, int thresh, int mode, float b_l, float t_l, float t_r, float b_r, float skinprot, float chrom, int hotbad) { const int halfwin = ceil(2*radius)+1; MyTime t1,t2; @@ -1002,31 +973,10 @@ float chrommed=0.f; #pragma omp parallel #endif { -#ifdef __SSE2__ - int j; - __m128 interav, interbv; - __m128 piidv = _mm_set1_ps(piid); -#endif #ifdef _OPENMP #pragma omp for #endif for(int i = 0; i < height; i++ ) { -/* -#ifdef __SSE2__ - for(j = 0; j < width-3; j+=4) { - interav = LVFU(tmaa[i][j]); - interbv = LVFU(tmbb[i][j]); - _mm_storeu_ps(&dst->h_p[i][j],(xatan2f(interbv,interav))/piidv); - _mm_storeu_ps(&dst->C_p[i][j],_mm_sqrt_ps(SQRV(interbv)+SQRV(interav))); - } - for(; j < width; j++) { - float intera = tmaa[i][j]; - float interb = tmbb[i][j]; - dst->h_p[i][j]=(xatan2f(interb,intera))/piid; - dst->C_p[i][j]=sqrt(SQR(interb)+SQR(intera)); - } -#else -*/ for(int j = 0; j < width; j++) { float intera = tmaa[i][j]; float interb = tmbb[i][j]; @@ -1042,7 +992,6 @@ float chrommed=0.f; dst->C_p[i][j]=sqrt(SQR(interb)+SQR(intera)); } } -//#endif } } @@ -1082,12 +1031,7 @@ float chrommed=0.f; } - -#if defined( __SSE2__ ) && defined( WIN32 ) -__attribute__((force_align_arg_pointer)) void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, double radius, int thresh, int mode, float b_l, float t_l, float t_r, float b_r, float skinprot, float chrom) -#else -void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, double radius, int thresh, int mode, float b_l, float t_l, float t_r, float b_r, float skinprot, float chrom) -#endif +SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, double radius, int thresh, int mode, float b_l, float t_l, float t_r, float b_r, float skinprot, float chrom) { const int halfwin = ceil(2*radius)+1; MyTime t1,t2; @@ -1490,49 +1434,19 @@ float chrommed=0.f; #pragma omp parallel #endif { -#ifdef __SSE2__ - int j; - __m128 interav, interbv; - -// __m128 piidv = _mm_set1_ps(piid); -#endif #ifdef _OPENMP #pragma omp for #endif for(int i = 0; i < height; i++ ) { - /* -#ifdef __SSE2__ - for(j = 0; j < width-3; j+=4) { - interav = LVFU(tmaa[i][j]); - interbv = LVFU(tmbb[i][j]); - _mm_storeu_ps(&dst->a[i][j],(interav)); - _mm_storeu_ps(&dst->b[i][j],(interbv)); - } - - // _mm_storeu_ps(&dst->C_p[i][j],_mm_sqrt_ps(SQRV(interbv)+SQRV(interav))); - } - for(; j < width; j++) { - float intera = tmaa[i][j]; - float interb = tmbb[i][j]; - - dst->a[i][j]=(intera); - dst->b[i][j]=(interb); - } -#else -*/ for(int j = 0; j < width; j++) { float intera = tmaa[i][j]; float interb = tmbb[i][j]; - float HH=xatan2f(interb,intera); float CC=sqrt(SQR(interb/327.68)+SQR(intera/327.68f)); - // if((HH > b_l && HH < t_r) && CC < chrom && skinprot !=0.f){ if(CC < chrom && skinprot !=0.f){ - - dst->a[i][j]=intera; - dst->b[i][j]=interb; + dst->a[i][j]=intera; + dst->b[i][j]=interb; } } -//#endif } } } @@ -1572,9 +1486,4 @@ float chrommed=0.f; } - - - } -#undef PIX_SORT -#undef med3 diff --git a/rtengine/cplx_wavelet_dec.h b/rtengine/cplx_wavelet_dec.h index a39f09fce..4fd92f7b8 100644 --- a/rtengine/cplx_wavelet_dec.h +++ b/rtengine/cplx_wavelet_dec.h @@ -34,26 +34,28 @@ namespace rtengine { public: typedef float internal_type; + float *coeff0; + bool memoryAllocationFailed; private: static const int maxlevels = 10;//should be greater than any conceivable order of decimation - int lvltot, subsamp; + int lvltot, subsamp; + int numThreads; size_t m_w, m_h;//dimensions int wavfilt_len, wavfilt_offset; float *wavfilt_anal; float *wavfilt_synth; - float *coeff0; wavelet_level * wavelet_decomp[maxlevels]; public: template - wavelet_decomposition(E * src, int width, int height, int maxlvl, int subsampling, int skipcrop = 1); + wavelet_decomposition(E * src, int width, int height, int maxlvl, int subsampling, int skipcrop = 1, int numThreads = 1); ~wavelet_decomposition(); @@ -91,8 +93,8 @@ namespace rtengine { }; template - wavelet_decomposition::wavelet_decomposition(E * src, int width, int height, int maxlvl, int subsampling, int skipcrop) - : lvltot(0), subsamp(subsampling), m_w(width), m_h(height), coeff0(NULL) + wavelet_decomposition::wavelet_decomposition(E * src, int width, int height, int maxlvl, int subsampling, int skipcrop, int numThreads) + : coeff0(NULL), memoryAllocationFailed(false), lvltot(0), subsamp(subsampling), numThreads(numThreads), m_w(width), m_h(height) { //initialize wavelet filters wavfilt_len = Daub4_len; @@ -113,18 +115,32 @@ namespace rtengine { lvltot=0; E *buffer[2]; - buffer[0] = new E[(m_w/2+1)*(m_h/2+1)]; - buffer[1] = new E[(m_w/2+1)*(m_h/2+1)]; + buffer[0] = new (std::nothrow) E[(m_w/2+1)*(m_h/2+1)]; + if(buffer[0] == NULL) { + memoryAllocationFailed = true; + return; + } + buffer[1] = new (std::nothrow) E[(m_w/2+1)*(m_h/2+1)]; + if(buffer[1] == NULL) { + memoryAllocationFailed = true; + delete[] buffer[0]; + buffer[0] = NULL; + return; + } int bufferindex = 0; wavelet_decomp[lvltot] = new wavelet_level(src, buffer[bufferindex^1], lvltot/*level*/, subsamp, m_w, m_h, \ - wavfilt_anal, wavfilt_anal, wavfilt_len, wavfilt_offset, skipcrop); + wavfilt_anal, wavfilt_anal, wavfilt_len, wavfilt_offset, skipcrop, numThreads); + if(wavelet_decomp[lvltot]->memoryAllocationFailed) + memoryAllocationFailed = true; while(lvltot < maxlvl-1) { lvltot++; bufferindex ^= 1; wavelet_decomp[lvltot] = new wavelet_level(buffer[bufferindex], buffer[bufferindex^1]/*lopass*/, lvltot/*level*/, subsamp, \ wavelet_decomp[lvltot-1]->width(), wavelet_decomp[lvltot-1]->height(), \ - wavfilt_anal, wavfilt_anal, wavfilt_len, wavfilt_offset, skipcrop); + wavfilt_anal, wavfilt_anal, wavfilt_len, wavfilt_offset, skipcrop, numThreads); + if(wavelet_decomp[lvltot]->memoryAllocationFailed) + memoryAllocationFailed = true; } coeff0 = buffer[bufferindex^1]; delete[] buffer[bufferindex]; @@ -132,7 +148,8 @@ namespace rtengine { template void wavelet_decomposition::reconstruct(E * dst) { - + if(memoryAllocationFailed) + return; // data structure is wavcoeffs[scale][channel={lo,hi1,hi2,hi3}][pixel_array] int m_w = 0; int m_h2 = 0; @@ -143,17 +160,33 @@ namespace rtengine { if(m_h2 < wavelet_decomp[lvl]->m_h2) m_h2 = wavelet_decomp[lvl]->m_h2; } - E *tmpLo = new E[m_w*m_h2]; - E *tmpHi = new E[m_w*m_h2]; + E *tmpLo = new (std::nothrow) E[m_w*m_h2]; + if(tmpLo == NULL) { + memoryAllocationFailed = true; + return; + } + + E *tmpHi = new (std::nothrow) E[m_w*m_h2]; + if(tmpHi == NULL) { + memoryAllocationFailed = true; + delete[] tmpLo; + return; + } E *buffer[2]; buffer[0] = coeff0; - buffer[1] = new E[(m_w/2+1)*(m_h/2+1)]; + buffer[1] = new (std::nothrow) E[(m_w/2+1)*(m_h/2+1)]; + if(buffer[1] == NULL) { + memoryAllocationFailed = true; + delete[] tmpHi; + delete[] tmpLo; + return; + } + int bufferindex = 0; for (int lvl=lvltot; lvl>0; lvl--) { wavelet_decomp[lvl]->reconstruct_level(tmpLo, tmpHi, buffer[bufferindex], buffer[bufferindex^1], wavfilt_synth, wavfilt_synth, wavfilt_len, wavfilt_offset); bufferindex ^= 1; - //skip /=2; } wavelet_decomp[0]->reconstruct_level(tmpLo, tmpHi, buffer[bufferindex], dst, wavfilt_synth, wavfilt_synth, wavfilt_len, wavfilt_offset); diff --git a/rtengine/cplx_wavelet_level.h b/rtengine/cplx_wavelet_level.h index dabd8c8ff..8d878dceb 100644 --- a/rtengine/cplx_wavelet_level.h +++ b/rtengine/cplx_wavelet_level.h @@ -25,6 +25,7 @@ #include #include "rt_math.h" #include "opthelper.h" +#include "stdio.h" namespace rtengine { template @@ -35,11 +36,14 @@ namespace rtengine { int lvl; // whether to subsample the output - bool subsamp_out; + bool subsamp_out; + + int numThreads; // spacing of filter taps int skip; - + + bool bigBlockOfMemory; // allocation and destruction of data storage T ** create(size_t n); void destroy(T ** subbands); @@ -68,6 +72,7 @@ namespace rtengine { void SynthesisFilterSubsampVertical (T * srcLo, T * srcHi, T * dst, float *filterLo, float *filterHi, const int taps, const int offset, const int width, const int srcheight, const int dstheight); #endif public: + bool memoryAllocationFailed; T ** wavcoeffs; // full size @@ -77,8 +82,8 @@ namespace rtengine { size_t m_w2, m_h2; template - wavelet_level(E * src, E * dst, int level, int subsamp, size_t w, size_t h, float *filterV, float *filterH, int len, int offset, int skipcrop) - : lvl(level), subsamp_out((subsamp>>level)&1), skip(1<>level)&1), numThreads(numThreads), skip(1< T ** wavelet_level::create(size_t n) { - T * data = new T[3*n]; + T * data = new (std::nothrow) T[3*n]; + if(data == NULL) { + bigBlockOfMemory = false; + } T ** subbands = new T*[4]; - for(size_t j = 1; j < 4; j++) { - subbands[j] = data + n * (j-1); + for(size_t j = 1; j < 4; j++) { + if(bigBlockOfMemory) + subbands[j] = data + n * (j-1); + else { + subbands[j] = new (std::nothrow) T[n]; + if(subbands[j] == NULL) { + printf("Couldn't allocate memory in level %d of wavelet\n",lvl); + memoryAllocationFailed = true; + } + } } return subbands; } template void wavelet_level::destroy(T ** subbands) { - if(subbands) { - delete[] subbands[1]; + if(subbands) { + if(bigBlockOfMemory) + delete[] subbands[1]; + else { + for(size_t j = 1; j < 4; j++) { + if(subbands[j] != NULL) + delete[] subbands[j]; + } + } delete[] subbands; } } @@ -192,8 +216,10 @@ namespace rtengine { /* Basic convolution code * Applies a Haar filter * - */ - + */ +#ifdef _OPENMP +#pragma omp parallel for num_threads(numThreads) if(numThreads>1) +#endif for (int k=0; k1) +#endif +{ +#ifdef _OPENMP +#pragma omp for nowait +#endif for(size_t i = 0; i < skip; i++) { for(int j=0;j @@ -360,7 +396,9 @@ namespace rtengine { // calculate coefficients int shift = skip*(taps-offset-1);//align filter with data - +#ifdef _OPENMP +#pragma omp parallel for num_threads(numThreads) if(numThreads>1) +#endif for (int k=0; k1) +#endif for(size_t i = 0; i < dstheight; i++) { int i_src = (i+shift)/2; int begin = (i+shift)%2; @@ -467,6 +508,9 @@ namespace rtengine { // calculate coefficients int shift=skip*(taps-offset-1);//align filter with data +#ifdef _OPENMP +#pragma omp parallel for num_threads(numThreads) if(numThreads>1) +#endif for(size_t i = 0; i < dstheight; i++) { int i_src = (i+shift)/2; int begin = (i+shift)%2; @@ -496,54 +540,80 @@ namespace rtengine { #ifdef __SSE2__ template template SSEFUNCTION void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { - T tmpLo[m_w] ALIGNED64; - T tmpHi[m_w] ALIGNED64; - /* filter along rows and columns */ + /* filter along rows and columns */ + float filterVarray[2*taps][4] ALIGNED64; if(subsamp_out) { - float filterVarray[2*taps][4] ALIGNED64; for(int i=0;i<2*taps;i++) { for(int j=0;j<4;j++) { filterVarray[i][j] = filterV[i]; } } + } +#ifdef _OPENMP +#pragma omp parallel num_threads(numThreads) if(numThreads>1) +#endif +{ + T tmpLo[m_w] ALIGNED64; + T tmpHi[m_w] ALIGNED64; + if(subsamp_out) { +#ifdef _OPENMP +#pragma omp for +#endif for(int row=0;row template void wavelet_level::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) { +#ifdef _OPENMP +#pragma omp parallel num_threads(numThreads) if(numThreads>1) +#endif +{ T tmpLo[m_w] ALIGNED64; T tmpHi[m_w] ALIGNED64; /* filter along rows and columns */ if(subsamp_out) { +#ifdef _OPENMP +#pragma omp for +#endif for(int row=0;row template SSEFUNCTION void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset) { + if(memoryAllocationFailed) + return; /* filter along rows and columns */ if (subsamp_out) { @@ -564,7 +634,8 @@ namespace rtengine { } #else template template void wavelet_level::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset) { - + if(memoryAllocationFailed) + return; /* filter along rows and columns */ if (subsamp_out) { SynthesisFilterSubsampHorizontal (src, wavcoeffs[1], tmpLo, filterH, filterH+taps, taps, offset, m_w2, m_w, m_h2); diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index c51f860d6..b223251dd 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -45,20 +45,6 @@ #endif #undef CLIPD #define CLIPD(a) ((a)>0.0f?((a)<1.0f?(a):1.0f):0.0f) -#define PIX_SORT(a,b) { if ((a)>(b)) {temp=(a);(a)=(b);(b)=temp;} } - -#define med3(a0,a1,a2,a3,a4,a5,a6,a7,a8,median) { \ -pp[0]=a0; pp[1]=a1; pp[2]=a2; pp[3]=a3; pp[4]=a4; pp[5]=a5; pp[6]=a6; pp[7]=a7; pp[8]=a8; \ -PIX_SORT(pp[1],pp[2]); PIX_SORT(pp[4],pp[5]); PIX_SORT(pp[7],pp[8]); \ -PIX_SORT(pp[0],pp[1]); PIX_SORT(pp[3],pp[4]); PIX_SORT(pp[6],pp[7]); \ -PIX_SORT(pp[1],pp[2]); PIX_SORT(pp[4],pp[5]); PIX_SORT(pp[7],pp[8]); \ -PIX_SORT(pp[0],pp[3]); PIX_SORT(pp[5],pp[8]); PIX_SORT(pp[4],pp[7]); \ -PIX_SORT(pp[3],pp[6]); PIX_SORT(pp[1],pp[4]); PIX_SORT(pp[2],pp[5]); \ -PIX_SORT(pp[4],pp[7]); PIX_SORT(pp[4],pp[2]); PIX_SORT(pp[6],pp[4]); \ -PIX_SORT(pp[4],pp[2]); median=pp[4];} //pp4 = median - - - namespace rtengine { diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 745c41dae..80d2c8813 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -34,6 +34,19 @@ #include "cplx_wavelet_dec.h" #include "editbuffer.h" +#define PIX_SORT(a,b) { if ((a)>(b)) {temp=(a);(a)=(b);(b)=temp;} } + +#define med3(a0,a1,a2,a3,a4,a5,a6,a7,a8,median) { \ +pp[0]=a0; pp[1]=a1; pp[2]=a2; pp[3]=a3; pp[4]=a4; pp[5]=a5; pp[6]=a6; pp[7]=a7; pp[8]=a8; \ +PIX_SORT(pp[1],pp[2]); PIX_SORT(pp[4],pp[5]); PIX_SORT(pp[7],pp[8]); \ +PIX_SORT(pp[0],pp[1]); PIX_SORT(pp[3],pp[4]); PIX_SORT(pp[6],pp[7]); \ +PIX_SORT(pp[1],pp[2]); PIX_SORT(pp[4],pp[5]); PIX_SORT(pp[7],pp[8]); \ +PIX_SORT(pp[0],pp[3]); PIX_SORT(pp[5],pp[8]); PIX_SORT(pp[4],pp[7]); \ +PIX_SORT(pp[3],pp[6]); PIX_SORT(pp[1],pp[4]); PIX_SORT(pp[2],pp[5]); \ +PIX_SORT(pp[4],pp[7]); PIX_SORT(pp[4],pp[2]); PIX_SORT(pp[6],pp[4]); \ +PIX_SORT(pp[4],pp[2]); median=pp[4];} //pp4 = median + + namespace rtengine { @@ -272,36 +285,30 @@ class ImProcFunctions { //void RGB_OutputTransf(LabImage * src, Imagefloat * dst, const procparams::DirPyrDenoiseParams & dnparams); //void output_tile_row (float *Lbloxrow, float ** Lhipassdn, float ** tilemask, int height, int width, int top, int blkrad ); void Tile_calc (int tilesize, int overlap, int kall, int imwidth, int imheight, int &numtiles_W, int &numtiles_H, int &tilewidth, int &tileheight, int &tileWskip, int &tileHskip); - + enum mediantype {MED_3X3SOFT, MED_3X3STRONG, MED_5X5SOFT, MED_5X5STRONG, MED_7X7, MED_9X9}; + void Median_Denoise( float **src, float **dst, int width, int height, mediantype medianType, int iterations, int numThreads, float **buffer = NULL); void RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp,const NoiseCurve & ctNoisCurve , const NoiseCurve & ctNoisCCcurve , float &chaut, float &redaut, float &blueaut, float &maxredaut, float & maxblueaut, float &nresi, float &highresi); void RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, const bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope); void RGB_denoise_info(Imagefloat * src, Imagefloat * calclum, bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float & maxblueaut, float &minredaut, float & minblueaut,float &nresi, float &highresi, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel,float &skinc, float &nsknc, bool multiThread = false); void RGBtile_denoise (float * fLblox, int hblproc, float noisevar_L, float * nbrwt, float * blurbuffer ); //for DCT void RGBoutput_tile_row (float *Lbloxrow, float ** Ldetail, float ** tilemask_out, int height, int width, int top ); - //void WaveletDenoise(cplx_wavelet_decomposition &DualTreeCoeffs, float noisevar ); - //void WaveletDenoise(wavelet_decomposition &WaveletCoeffs, float noisevar ); - // void WaveletDenoiseAll(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, - // wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float noisevar_ab, wavelet_decomposition &wch, NoiImage * noi ); - void WaveletDenoiseAll(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, bool autoch, bool perf); + bool WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, float noisevar_L, float *noisevarlum, float madL[8][3], int width, int height, const bool useNoiseLCurve, bool denoiseMethodRgb); + bool WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], int width, int height, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool perf); void WaveletDenoiseAll_info(int levwav, wavelet_decomposition &WaveletCoeffs_a, wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut,float &minredaut, float & minblueaut, int schoice, bool autoch, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel,float &skinc, float &nsknc, - float &maxchred, float &maxchblue,float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool perf, bool multiThread); + float &maxchred, float &maxchblue,float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb, bool multiThread); - void WaveletDenoiseAll_BiShrink(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, - wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb, LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, bool autoch, bool perf); - //void BiShrink(float * ReCoeffs, float * ImCoeffs, float * ReParents, float * ImParents, - // int W, int H, int level, int padding, float noisevar); - //void Shrink(float ** WavCoeffs, int W, int H, int level, float noisevar); - // void ShrinkAll(float ** WavCoeffs_L, float ** WavCoeffs_a, float ** WavCoeffs_b, int level, - // int W_L, int H_L, int W_ab, int H_ab, int W_h, int H_h, int skip_L, int skip_ab, int skip_h, float noisevar_L, float noisevar_ab, float **WavCoeffs_h, LabImage * noi); - - void ShrinkAll(float ** WavCoeffs_L, float ** WavCoeffs_a, float ** WavCoeffs_b, float **buffer, int level, - int W_L, int H_L, int W_ab, int H_ab, int skip_L, int skip_ab, float noisevar_L, float *noisevarlum, float *noisevarchrom, int width, int height, float noisevar_abr, float noisevar_abb,LabImage * noi, const bool useNoiseLCurve, const bool useNoiseCCurve, float &chaut, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, bool autoch, bool perf, float * madaa = NULL, float * madab = NULL, float * madaL = NULL, bool madCalculated= false); + bool WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float noisevar_L, float *noisevarlum, float madL[8][3], int width, int height, const bool useNoiseLCurve, bool denoiseMethodRgb); + bool WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], int width, int height, float noisevar_ab, + const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb); + void ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, + float noisevar_L, float *noisevarlum, int width, int height, const bool useNoiseLCurve, bool denoiseMethodRgb, float * madaL); + void ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir, + float *noisevarchrom, int width, int height, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb, float * madL, float * madaab = NULL, bool madCalculated = false); void ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b, int level, int W_ab, int H_ab, int skip_ab, float **noisevarlum, float **noisevarchrom, float **noisevarhue, int width, int height, float noisevar_abr, float noisevar_abb,LabImage * noi, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, bool autoch, int schoice, int lvl, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel,float &skinc, float &nsknc, float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool perf, bool multiThread); - void Noise_residual(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_a, wavelet_decomposition &WaveletCoeffs_b, int width, int height, float &chresid, float &chmaxredresid,float &chmaxblueresid, float &chresidred, float & chresidblue, float resid, float residblue, float residred, float maxredresid, float maxblueresid, float nbresid, bool denoiseMethodRgb); + void Noise_residualAB(wavelet_decomposition &WaveletCoeffs_ab, float &chresid, float &chmaxresid, bool denoiseMethodRgb); void calcautodn_info (float &chaut, float &delta, int Nb, int levaut, float maxmax, float lumema, float chromina, int mode, int lissage, float redyel, float skinc, float nsknc); float MadMax(float * HH_Coeffs, int &max, int datalen); float Mad(float * DataList, const int datalen);