From 0c0ffd34d9d66cc917fed506d008c51afc2cf84b Mon Sep 17 00:00:00 2001
From: Emil Martinec <ejm.60657@gmail.com>
Date: Mon, 20 Dec 2010 19:07:09 -0600
Subject: [PATCH] Improvement to pyramid denoise to use neighborhood median of
 denoise strength.

---
 rtengine/dirpyrLab_denoise.cc | 252 +++++++++++++++++++++++-----------
 1 file changed, 169 insertions(+), 83 deletions(-)

diff --git a/rtengine/dirpyrLab_denoise.cc b/rtengine/dirpyrLab_denoise.cc
index 7d9efdb17..a1bc88bcf 100644
--- a/rtengine/dirpyrLab_denoise.cc
+++ b/rtengine/dirpyrLab_denoise.cc
@@ -24,6 +24,7 @@
 #include <curves.h>
 #include <labimage.h>
 #include <improcfun.h>
+#include <gauss.h>
 
 #ifdef _OPENMP
 #include <omp.h>
@@ -36,9 +37,9 @@
 
 
 
-#define DIRWT_L(i1,j1,i,j) (/*domker[(i1-i)/scale+halfwin][(j1-j)/scale+halfwin] */  rangefn_L[(int)(data_fine->L[i1][j1]-data_fine->L[i][j]+0x10000)] )
+#define DIRWT_L(i1,j1,i,j) (rangefn_L[(int)(data_fine->L[i1][j1]-data_fine->L[i][j]+0x10000)] )
 
-#define DIRWT_AB(i1,j1,i,j) ( /*domker[(i1-i)/scale+halfwin][(j1-j)/scale+halfwin]*/ rangefn_ab[(int)(data_fine->a[i1][j1]-data_fine->a[i][j]+0x10000)] *  \
+#define DIRWT_AB(i1,j1,i,j) (rangefn_ab[(int)(data_fine->a[i1][j1]-data_fine->a[i][j]+0x10000)] *  \
 rangefn_ab[(int)(data_fine->L[i1][j1]-data_fine->L[i][j]+0x10000)] * \
 rangefn_ab[(int)(data_fine->b[i1][j1]-data_fine->b[i][j]+0x10000)] )
 
@@ -46,6 +47,18 @@ rangefn_ab[(int)(data_fine->b[i1][j1]-data_fine->b[i][j]+0x10000)] )
 
 #define NRWT_AB (nrwt_ab[(int)((hipass[1]+0x10000))] * nrwt_ab[(int)((hipass[2]+0x10000))])
 
+#define PIX_SORT(a,b) { if ((a)>(b)) {temp=(a);(a)=(b);(b)=temp;} }
+
+#define med3x3(a0,a1,a2,a3,a4,a5,a6,a7,a8,median) { \
+p[0]=a0; p[1]=a1; p[2]=a2; p[3]=a3; p[4]=a4; p[5]=a5; p[6]=a6; p[7]=a7; p[8]=a8; \
+PIX_SORT(p[1],p[2]); PIX_SORT(p[4],p[5]); PIX_SORT(p[7],p[8]); \
+PIX_SORT(p[0],p[1]); PIX_SORT(p[3],p[4]); PIX_SORT(p[6],p[7]); \
+PIX_SORT(p[1],p[2]); PIX_SORT(p[4],p[5]); PIX_SORT(p[7],p[8]); \
+PIX_SORT(p[0],p[3]); PIX_SORT(p[5],p[8]); PIX_SORT(p[4],p[7]); \
+PIX_SORT(p[3],p[6]); PIX_SORT(p[1],p[4]); PIX_SORT(p[2],p[5]); \
+PIX_SORT(p[4],p[7]); PIX_SORT(p[4],p[2]); PIX_SORT(p[6],p[4]); \
+PIX_SORT(p[4],p[2]); median=p[4];} //a4 is the median
+
 
 
 namespace rtengine {
@@ -269,20 +282,9 @@ namespace rtengine {
 		int width = data_fine->W;
 		int height = data_fine->H;
 		
-
-		
-		
-		//generate domain kernel 
 		int halfwin = 3;//MIN(ceil(2*sig),3);
 		int scalewin = halfwin*scale;
-		//int intfactor = 16384;
-		
-		/*float domker[7][7];
-		 for (int i=-halfwin; i<=halfwin; i++)
-		 for (int j=-halfwin; j<=halfwin; j++) {
-		 domker[i+halfwin][j+halfwin] = (int)(exp(-(i*i+j*j)/(2*sig*sig))*intfactor); //or should we use a value that depends on sigma???
-		 }*/
-		//float domker[5][5] = {{1,1,1,1,1},{1,2,2,2,1},{1,2,4,2,1},{1,2,2,2,1},{1,1,1,1,1}};
+
 		
 #ifdef _OPENMP
 #pragma omp parallel for
@@ -291,19 +293,12 @@ namespace rtengine {
 		for(int i = 0; i < height; i+=pitch ) { int i1=i/pitch;
 			for(int j = 0, j1=0; j < width; j+=pitch, j1++)
 			{				
-				//norm = DIRWT(i, j, i, j);
-				//Lout = -norm*data_fine->L[i][j];//if we don't want to include the input pixel in the sum
-				//aout = -norm*data_fine->a[i][j];
-				//bout = -norm*data_fine->b[i][j];
-				//or
 				float dirwt_l, dirwt_ab, norm_l, norm_ab;
-				//float lops,aops,bops;
 				float Lout, aout, bout;
 				norm_l = norm_ab = 0;//if we do want to include the input pixel in the sum
 				Lout = 0;
 				aout = 0;
 				bout = 0;
-				//normab = 0;
 				
 				for(int inbr=MAX(0,i-scalewin); inbr<=MIN(height-1,i+scalewin); inbr+=scale) {
 					for (int jnbr=MAX(0,j-scalewin); jnbr<=MIN(width-1,j+scalewin); jnbr+=scale) {
@@ -316,14 +311,7 @@ namespace rtengine {
 						norm_ab += dirwt_ab;
 					}
 				}
-				//lops = Lout/norm;//diagnostic
-				//aops = aout/normab;//diagnostic
-				//bops = bout/normab;//diagnostic
-				
-				//data_coarse->L[i1][j1]=0.5*(data_fine->L[i][j]+Lout/norm_l);//low pass filter
-				//data_coarse->a[i1][j1]=0.5*(data_fine->a[i][j]+aout/norm_ab);
-				//data_coarse->b[i1][j1]=0.5*(data_fine->b[i][j]+bout/norm_ab);
-				//or
+
 				data_coarse->L[i1][j1]=Lout/norm_l;//low pass filter
 				data_coarse->a[i1][j1]=aout/norm_ab;
 				data_coarse->b[i1][j1]=bout/norm_ab;
@@ -343,6 +331,9 @@ namespace rtengine {
 		int width = data_fine->W;
 		int height = data_fine->H;
 		
+		float radius = 1.5;
+		float temp, median;
+		
 		//float eps = 0.0;
 		
 		// c[0] noise_L
@@ -354,21 +345,20 @@ namespace rtengine {
 		float noisevar_L = 4*SQR(25.0 * luma);
 		float noisevar_ab = 2*SQR(100.0 * chroma);
 		float scalefactor = 1.0/pow(2.0,(level+1)*2);//change the last 2 to 1 for longer tail of higher scale NR
-		//float recontrast = (1+((float)(c[6])/100.0));
-		//float resaturate = 10*(1+((float)(c[7])/100.0));
 		noisevar_L *= scalefactor;
 		
-		//int halfwin = 3;//MIN(ceil(2*sig),3);
-		//int intfactor= 16384; 
-		//int winwidth=1+2*halfwin;//this belongs in calling function
-		/*float domker[7][7];
-		 for (int i=-halfwin; i<=halfwin; i++)
-		 for (int j=-halfwin; j<=halfwin; j++) {
-		 domker[i][j] = (int)(exp(-(i*i+j*j)/(2*sig*sig))*intfactor); //or should we use a value that depends on sigma???
-		 }*/
-		//float domker[5][5] = {{1,1,1,1,1},{1,2,2,2,1},{1,2,4,2,1},{1,2,2,2,1},{1,1,1,1,1}};
-		
-		
+		//temporary array to store NR factors
+		float** nrfactorL = new float*[height];
+		float** nrfactorab = new float*[height];
+		for (int i=0; i<height; i++) {
+			nrfactorL[i] = new float[width];
+			nrfactorab[i] = new float[width];
+		}
+		/*float (*nrfactorL);
+		float (*nrfactorab);
+
+		nrfactorL = (float (*)) calloc ((height)*(width), sizeof(float));
+		nrfactorab = (float (*)) calloc ((height)*(width), sizeof(float));*/
 		
 		// for coarsest level, take non-subsampled lopass image and subtract from lopass_fine to generate hipass image
 		
@@ -386,16 +376,16 @@ namespace rtengine {
 		
 		// step (1)
 		
-		if (pitch==1) { 
+		if (pitch==1) {// step (1) not needed
 			
-			// step (1-2-3-4) 
+			// step (2-3-4) 
 #ifdef _OPENMP
 #pragma omp parallel for
 #endif
 			for(int  i = 0; i < height; i++)
 				for(int  j = 0; j < width; j++) {
-					double wtdsum[3], norm;
-					float hipass[3], hpffluct[3], tonefactor, nrfactor;
+					
+					float hipass[3], hpffluct[3], tonefactor;//, nrfactor;
 				
 					tonefactor = ((NRWT_L(data_coarse->L[i][j])));
 					
@@ -404,8 +394,9 @@ namespace rtengine {
 					if (level<2) {
 						hipass[0] = data_fine->L[i][j]-data_coarse->L[i][j];
 						hpffluct[0]=SQR(hipass[0])+0.001;
-						hipass[0] *= hpffluct[0]/(hpffluct[0]+noisevar_L);
-						data_fine->L[i][j] = CLIP(hipass[0]+data_coarse->L[i][j]);
+						nrfactorL[i][j] = hpffluct[0]/(hpffluct[0]+noisevar_L);
+						//hipass[0] *= hpffluct[0]/(hpffluct[0]+noisevar_L);
+						//data_fine->L[i][j] = CLIP(hipass[0]+data_coarse->L[i][j]);
 					}
 					
 					//chroma
@@ -413,20 +404,62 @@ namespace rtengine {
 					hipass[2] = data_fine->b[i][j]-data_coarse->b[i][j];
 					hpffluct[1]=SQR(hipass[1]*tonefactor)+0.001;
 					hpffluct[2]=SQR(hipass[2]*tonefactor)+0.001;
-					nrfactor = (hpffluct[1]+hpffluct[2]) /((hpffluct[1]+hpffluct[2]) + noisevar_ab * NRWT_AB);
+					nrfactorab[i][j] = (hpffluct[1]+hpffluct[2]) /((hpffluct[1]+hpffluct[2]) + noisevar_ab * NRWT_AB);
+					/*nrfactor = (hpffluct[1]+hpffluct[2]) /((hpffluct[1]+hpffluct[2]) + noisevar_ab * NRWT_AB);
 
 					hipass[1] *= nrfactor;
 					hipass[2] *= nrfactor;
 					
+					data_fine->a[i][j] = hipass[1]+data_coarse->a[i][j];
+					data_fine->b[i][j] = hipass[2]+data_coarse->b[i][j];*/
+				}
+			
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+			{
+				AlignedBuffer<double>* buffer = new AlignedBuffer<double> (MAX(width,height));
+				//gaussHorizontal<float> (nrfactorL, nrfactorL, buffer, width, height, radius, multiThread);
+				gaussHorizontal<float> (nrfactorab, nrfactorab, buffer, width, height, radius, multiThread);
+				//gaussVertical<float>   (nrfactorL, nrfactorL, buffer, width, height, radius, multiThread);
+				gaussVertical<float>   (nrfactorab, nrfactorab, buffer, width, height, radius, multiThread);
+				delete buffer;
+			}
+			
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+			for(int  i = 0; i < height; i++)
+				for(int  j = 0; j < width; j++) {
+					float hipass[3],p[9];
+					
+					//luma
+					if (level<2) {
+						if (i>0 && i<height-1 && j>0 && j<width-1) {
+							med3x3(nrfactorL[i-1][j-1], nrfactorL[i-1][j], nrfactorL[i-1][j+1], \
+								   nrfactorL[i][j-1], nrfactorL[i][j], nrfactorL[i][j+1], \
+								   nrfactorL[i+1][j-1], nrfactorL[i+1][j], nrfactorL[i+1][j+1], median);
+						} else {
+							median = nrfactorL[i][j];
+						}
+						hipass[0] = median*(data_fine->L[i][j]-data_coarse->L[i][j]);
+						//hipass[0] = nrfactorL[i][j]*(data_fine->L[i][j]-data_coarse->L[i][j]);
+						data_fine->L[i][j] = CLIP(hipass[0]+data_coarse->L[i][j]);
+					}
+					
+					//chroma
+					hipass[1] = nrfactorab[i][j]*(data_fine->a[i][j]-data_coarse->a[i][j]);
+					hipass[2] = nrfactorab[i][j]*(data_fine->b[i][j]-data_coarse->b[i][j]);
+					 
 					data_fine->a[i][j] = hipass[1]+data_coarse->a[i][j];
 					data_fine->b[i][j] = hipass[2]+data_coarse->b[i][j];
 				}
 			
-		} else {
+		} else {//pitch >1; need to fill in data by upsampling
 			
 			LabImage* smooth;
-			
 			smooth = new LabImage(width, height);
+			
 #ifdef _OPENMP
 #pragma omp parallel
 #endif
@@ -516,44 +549,97 @@ namespace rtengine {
 					smooth->b[i+1][j]=wtdsum[2]*norm;
 					
 				}
-			
-#ifdef _OPENMP
-#pragma omp for	
-#endif
 		
-			// step (2-3-4) 
-			for( int i = 0; i < height; i++)
-				for(int j = 0; j < width; j++) {
-					
-					double tonefactor = ((NRWT_L(smooth->L[i][j])));
-					//double wtdsum[3], norm;
-					float hipass[3], hpffluct[3],  nrfactor;
-					//Wiener filter
-					//luma
-					if (level<2) {
-						hipass[0] = data_fine->L[i][j]-smooth->L[i][j];
-						hpffluct[0]=SQR(hipass[0])+0.001;
-						hipass[0] *= hpffluct[0]/(hpffluct[0]+noisevar_L);
-						data_fine->L[i][j] = CLIP(hipass[0]+smooth->L[i][j]);
-					}
-					
-					//chroma
-					hipass[1] = data_fine->a[i][j]-smooth->a[i][j];
-					hipass[2] = data_fine->b[i][j]-smooth->b[i][j];
-					hpffluct[1]=SQR(hipass[1]*tonefactor)+0.001;
-					hpffluct[2]=SQR(hipass[2]*tonefactor)+0.001;
-					nrfactor = (hpffluct[1]+hpffluct[2]) /((hpffluct[1]+hpffluct[2]) + noisevar_ab * NRWT_AB);
-
-					hipass[1] *= nrfactor;
-					hipass[2] *= nrfactor;
-					
-					data_fine->a[i][j] = hipass[1]+smooth->a[i][j];
-					data_fine->b[i][j] = hipass[2]+smooth->b[i][j];
+	
+	// step (2-3-4) 
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+	for(int  i = 0; i < height; i++)
+		for(int  j = 0; j < width; j++) {
+			
+			float hipass[3], hpffluct[3], tonefactor;//, nrfactor;
+			
+			tonefactor = (NRWT_L(smooth->L[i][j]));
+			
+			//Wiener filter
+			//luma
+			if (level<2) {
+				hipass[0] = data_fine->L[i][j]-smooth->L[i][j];
+				hpffluct[0]=SQR(hipass[0])+0.001;
+				nrfactorL[i][j] = hpffluct[0]/(hpffluct[0]+noisevar_L);
+				//hipass[0] *= hpffluct[0]/(hpffluct[0]+noisevar_L);
+				//data_fine->L[i][j] = CLIP(hipass[0]+smooth->L[i][j]);
+			}
+			
+			//chroma
+			hipass[1] = data_fine->a[i][j]-smooth->a[i][j];
+			hipass[2] = data_fine->b[i][j]-smooth->b[i][j];
+			hpffluct[1]=SQR(hipass[1]*tonefactor)+0.001;
+			hpffluct[2]=SQR(hipass[2]*tonefactor)+0.001;
+			nrfactorab[i][j] = (hpffluct[1]+hpffluct[2]) /((hpffluct[1]+hpffluct[2]) + noisevar_ab * NRWT_AB);
+			/*nrfactor = (hpffluct[1]+hpffluct[2]) /((hpffluct[1]+hpffluct[2]) + noisevar_ab * NRWT_AB);
+			 
+			 hipass[1] *= nrfactor;
+			 hipass[2] *= nrfactor;
+			 
+			 data_fine->a[i][j] = hipass[1]+smooth->a[i][j];
+			 data_fine->b[i][j] = hipass[2]+smooth->b[i][j];*/
+		}
+	
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+	{
+		AlignedBuffer<double>* buffer = new AlignedBuffer<double> (MAX(width,height));
+		//gaussHorizontal<float> (nrfactorL, nrfactorL, buffer, width, height, radius, multiThread);
+		gaussHorizontal<float> (nrfactorab, nrfactorab, buffer, width, height, radius, multiThread);
+		//gaussVertical<float>   (nrfactorL, nrfactorL, buffer, width, height, radius, multiThread);
+		gaussVertical<float>   (nrfactorab, nrfactorab, buffer, width, height, radius, multiThread);
+		delete buffer;
+	}
+	
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+	for(int  i = 0; i < height; i++)
+		for(int  j = 0; j < width; j++) {
+			float hipass[3],p[9];
+			
+			//luma
+			if (level<2) {
+				if (i>0 && i<height-1 && j>0 && j<width-1) {
+					med3x3(nrfactorL[i-1][j-1], nrfactorL[i-1][j], nrfactorL[i-1][j+1], \
+						   nrfactorL[i][j-1], nrfactorL[i][j], nrfactorL[i][j+1], \
+						   nrfactorL[i+1][j-1], nrfactorL[i+1][j], nrfactorL[i+1][j+1], median);
+				} else {
+					median = nrfactorL[i][j];
 				}
+				hipass[0] = median*(data_fine->L[i][j]-smooth->L[i][j]);
+				//hipass[0] = nrfactorL[i][j]*(data_fine->L[i][j]-smooth->L[i][j]);
+				data_fine->L[i][j] = CLIP(hipass[0]+smooth->L[i][j]);
+			}
+			
+			//chroma
+			hipass[1] = nrfactorab[i][j]*(data_fine->a[i][j]-smooth->a[i][j]);
+			hipass[2] = nrfactorab[i][j]*(data_fine->b[i][j]-smooth->b[i][j]);
+			
+			data_fine->a[i][j] = hipass[1]+smooth->a[i][j];
+			data_fine->b[i][j] = hipass[2]+smooth->b[i][j];
+		}
+	
+	
 }	// end parallel		
 			delete smooth;
 		}//end of pitch>1
 		
+		for (int i=0; i<height; i++) {
+			delete [] nrfactorL[i];
+			delete [] nrfactorab[i];
+		}
+		delete [] nrfactorL;
+		delete [] nrfactorab;
+		
 	};