From b4fd8c5ce16100e34d25a0fed46acd73538d40de Mon Sep 17 00:00:00 2001 From: Ingo Date: Fri, 14 Mar 2014 13:56:59 +0100 Subject: [PATCH] Time reduction of Noise Reduction, Issue 1971 --- rtengine/FTblockDN.cc | 387 ++++++++++++++++++++++++++++-------------- rtengine/boxblur.h | 46 +---- rtengine/improcfun.h | 5 +- 3 files changed, 266 insertions(+), 172 deletions(-) diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index 7eb267577..6b54a22e1 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -40,10 +40,8 @@ #include "boxblur.h" #include "rt_math.h" #include "mytime.h" -#include "sleef.c" -#ifdef __SSE2__ - #include "sleefsseavx.c" -#endif +#include "sleef.c" +#include "opthelper.h" #ifdef _OPENMP #include @@ -89,7 +87,7 @@ namespace rtengine { void ImProcFunctions::RGB_denoise(Imagefloat * src, Imagefloat * dst, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const procparams::DefringeParams & defringe, const double expcomp) - { + { //#ifdef _DEBUG // MyTime t1e,t2e; // t1e.set(); @@ -98,6 +96,7 @@ namespace rtengine { static MyMutex FftwMutex; MyMutex::MyLock lock(FftwMutex); + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% /*if (plistener) { @@ -109,8 +108,8 @@ namespace rtengine { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - const short int imheight=src->height, imwidth=src->width; - + const short int imheight=src->height, imwidth=src->width; + if (dnparams.luma==0 && dnparams.chroma==0) { //nothing to do; copy src to dst memcpy(dst->data,src->data,dst->width*dst->height*3*sizeof(float)); @@ -174,7 +173,7 @@ namespace rtengine { array2D tilemask_out(TS,TS); const int border = MAX(2,TS/16); - + #ifdef _OPENMP #pragma omp parallel for #endif @@ -190,7 +189,6 @@ namespace rtengine { } } - //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // begin tile processing of image @@ -296,7 +294,10 @@ namespace rtengine { { Lblox = (float*) fftwf_malloc(max_numblox_W*TS*TS*sizeof(float)); fLblox = (float*) fftwf_malloc(max_numblox_W*TS*TS*sizeof(float)); - } + } + + float * nbrwt = new float[TS*TS]; + float * blurbuffer = new float[TS*TS]; #ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) #endif @@ -317,14 +318,10 @@ namespace rtengine { //pixel weight array2D totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks - // - //#ifdef _OPENMP - //#pragma omp parallel for - //#endif //TODO: implement using AlignedBufferMP //fill tile from image; convert RGB to "luma/chroma" if (isRAW) {//image is raw; use channel differences for chroma channels - if(!perf){//lab mode + if(!perf){//lab mode //modification Jacques feb 2013 for (int i=tiletop/*, i1=0*/; i pBuf(width + TS + 2*blkrad*offset); for (int vblk=0; vblkdata, dsttmp->data, 3*dst->width*dst->height*sizeof(float)); @@ -804,16 +790,11 @@ namespace rtengine { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#if defined( __SSE2__ ) && defined( WIN32 ) -__attribute__((force_align_arg_pointer)) void ImProcFunctions::RGBtile_denoise (float * fLblox, int vblproc, int hblproc, int numblox_H, int numblox_W, float noisevar_Ldetail ) //for DCT -#else - void ImProcFunctions::RGBtile_denoise (float * fLblox, int vblproc, int hblproc, int numblox_H, int numblox_W, float noisevar_Ldetail ) //for DCT -#endif +SSEFUNCTION void ImProcFunctions::RGBtile_denoise (float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer ) //for DCT { - float * nbrwt = new float[TS*TS]; //for DCT int blkstart = hblproc*TS*TS; - boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT + boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS, blurbuffer);//blur neighbor weights for more robust estimation //for DCT #ifdef __SSE2__ __m128 tempv; @@ -830,7 +811,6 @@ __attribute__((force_align_arg_pointer)) void ImProcFunctions::RGBtile_denoise ( fLblox[blkstart+n] *= (1-xexpf(-SQR(nbrwt[n])/noisevar_Ldetail)); }//output neighbor averaged result #endif - delete[] nbrwt; //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //printf("vblk=%d hlk=%d wsqave=%f || ",vblproc,hblproc,wsqave); @@ -851,9 +831,6 @@ __attribute__((force_align_arg_pointer)) void ImProcFunctions::RGBtile_denoise ( int imax = bottom - top; //add row of tiles to output image -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int hblk=0; hblk < numblox_W; hblk++) { int left = (hblk-blkrad)*offset; int right = MIN(left+TS, width); @@ -862,7 +839,7 @@ __attribute__((force_align_arg_pointer)) void ImProcFunctions::RGBtile_denoise ( int indx = hblk*TS; for (int i=imin; i=0; lvl--) {//for levels less than max, use level diff to make edge mask //for (int lvl=0; lvl edge(Wlvl_L,Hlvl_L); + float * sfave = new float[Wlvl_L*Hlvl_L]; + float * WavCoeffsLtemp = new float[Hlvl_ab*Wlvl_ab]; + +// array2D edge(Wlvl_L,Hlvl_L); //printf("\n level=%d \n",lvl); for (int dir=1; dir<4; dir++) { float mad_L = madL[lvl][dir-1]; float mad_a = noisevar_abr*mada[lvl][dir-1]; - float mad_b = noisevar_abb*madb[lvl][dir-1]; + float mad_b = noisevar_abb*madb[lvl][dir-1]; + + //float mad_Lpar = madL[lvl+1][dir-1]; //float mad_apar = mada[lvl+1][dir-1]; //float mad_bpar = mada[lvl+1][dir-1]; //float skip_ab_ratio = WaveletCoeffs_a.level_stride(lvl+1)/skip_ab; - float skip_L_ratio = WaveletCoeffs_L.level_stride(lvl+1)/skip_L; +// float skip_L_ratio = WaveletCoeffs_L.level_stride(lvl+1)/skip_L; if (noisevar_abr>0.01f || noisevar_abb>0.01f) { - + for(int i=0;i0.01f) { - mad_L *= noisevar_L*5/(lvl+1); -//OpenMP here + if (noisevar_L>0.01f) { + mad_L *= noisevar_L*5.f/(lvl+1); +#ifdef __SSE2__ + int j; + __m128 mad_Lv = _mm_set1_ps(mad_L); + __m128 mad_Lm9v = _mm_set1_ps(mad_L * 9.f); + __m128 epsv = _mm_set1_ps(eps); + __m128 mag_Lv; + for (int i=0; i (edge, edge, buffer, Wlvl_L, Hlvl_L, 1<<(lvl+1), false /*multiThread*/); //gaussVertical (edge, edge, buffer, Wlvl_L, Hlvl_L, 1<<(lvl+1), false); boxblur(sfave, sfave, lvl+2, lvl+2, Wlvl_L, Hlvl_L);//increase smoothness by locally averaging shrinkage -//OpenMP here + +#ifdef __SSE2__ + __m128 tempLv; + __m128 tempL2v; + __m128 sf_Lv; + + for (int i=0; i 582=max float mad_b = madb*noisevar_abb; - if (noisevar_abr>0.01f || noisevar_abb>0.01f) { -//OpenMP here + if (noisevar_abr>0.01f || noisevar_abb>0.01f) { + for(int i=0;i2*thresh_a ? 1 : (coeff_a2*thresh_b ? 1 : (coeff_b0.01f) { @@ -1225,9 +1364,6 @@ __attribute__((force_align_arg_pointer)) void ImProcFunctions::ShrinkAll(float * sfave[i] = mag/(mag+mad_L*xexpf(-mag/(9*mad_L))+eps); } #else -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int i=0; i void boxblur (T** src, A** dst, int radx, int rady, i //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#if defined( __SSE2__ ) && defined( WIN32 ) -template __attribute__((force_align_arg_pointer)) void boxblur (T* src, A* dst, int radx, int rady, int W, int H) { -#else -template void boxblur (T* src, A* dst, int radx, int rady, int W, int H) { -#endif +template SSEFUNCTION void boxblur (T* src, A* dst, int radx, int rady, int W, int H) { + //printf("boxblur\n"); //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) @@ -138,9 +133,6 @@ template void boxblur (T* src, A* dst, int radx, int rady, int } } else { //horizontal blur -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int row = 0; row < H; row++) { int len = radx + 1; temp[row*W+0] = (float)src[row*W+0]; @@ -163,9 +155,6 @@ template void boxblur (T* src, A* dst, int radx, int rady, int } if (rady==0) { -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int row=0; row void boxblur (T* src, A* dst, int radx, int rady, int } } #else -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int col = 0; col < W; col++) { int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; @@ -664,32 +650,18 @@ template void boxcorrelate (T* src, A* dst, int dx, int dy, in //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -#if defined( __SSE2__ ) && defined( WIN32 ) -template __attribute__((force_align_arg_pointer)) void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H) { -#else -template void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H) { -#endif +template SSEFUNCTION void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H, float * temp) { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) - AlignedBuffer* buffer = new AlignedBuffer (W*H); - float* temp = buffer->data; - if (radx==0) { -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int row=0; row void boxabsblur (T* src, A* dst, int radx, int rady, } if (rady==0) { -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int row=0; row void boxabsblur (T* src, A* dst, int radx, int rady, } #else - -//OpenMP here -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int col = 0; col < W; col++) { int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; @@ -792,8 +756,6 @@ template void boxabsblur (T* src, A* dst, int radx, int rady, #endif } - delete buffer; - } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index e50bec30a..896c68a8c 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -249,7 +249,7 @@ class ImProcFunctions { //void RGB_OutputTransf(LabImage * src, Imagefloat * dst, const procparams::DirPyrDenoiseParams & dnparams); //void output_tile_row (float *Lbloxrow, float ** Lhipassdn, float ** tilemask, int height, int width, int top, int blkrad ); void RGB_denoise(Imagefloat * src, Imagefloat * dst, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const procparams::DefringeParams & defringe, const double expcomp); - void RGBtile_denoise (float * fLblox, int vblproc, int hblproc, int numblox_H, int numblox_W, float noisevar_L ); //for DCT + void RGBtile_denoise (float * fLblox, int hblproc, float noisevar_L, float * nbrwt, float * blurbuffer ); //for DCT void RGBoutput_tile_row (float *Lbloxrow, float ** Ldetail, float ** tilemask_out, int height, int width, int top ); //void WaveletDenoise(cplx_wavelet_decomposition &DualTreeCoeffs, float noisevar ); //void WaveletDenoise(wavelet_decomposition &WaveletCoeffs, float noisevar ); @@ -267,9 +267,10 @@ class ImProcFunctions { // int W_L, int H_L, int W_ab, int H_ab, int W_h, int H_h, int skip_L, int skip_ab, int skip_h, float noisevar_L, float noisevar_ab, float **WavCoeffs_h, LabImage * noi); void ShrinkAll(float ** WavCoeffs_L, float ** WavCoeffs_a, float ** WavCoeffs_b, int level, - int W_L, int H_L, int W_ab, int H_ab, int skip_L, int skip_ab, float noisevar_L, float noisevar_abr, float noisevar_abb,LabImage * noi); + int W_L, int H_L, int W_ab, int H_ab, int skip_L, int skip_ab, float noisevar_L, float noisevar_abr, float noisevar_abb,LabImage * noi, float * madaa = NULL, float * madab = NULL, float * madaL = NULL, bool madCalculated = false); float MadMax(float * HH_Coeffs, int &max, int datalen); + float Mad(float * HH_Coeffs, int datalen, int * histo); // pyramid equalizer void dirpyr_equalizer (float ** src, float ** dst, int srcwidth, int srcheight, float ** l_a, float ** l_b, float ** dest_a, float ** dest_b, const double * mult, const double dirpyrThreshold, const double skinprot, const bool gamutlab, float b_l, float t_l, float t_r, float b_r, int choice, int scale);//Emil's directional pyramid equalizer