OpenMP for faster implementation of pyramid schemes ;-)

2010-09-22 22:43:39 -05:00
parent 539c39a92b
commit bde7141939
4 changed files with 102 additions and 59 deletions
--- a/rtengine/dirpyrLab_denoise.cc
+++ b/rtengine/dirpyrLab_denoise.cc
@@ -269,9 +269,7 @@ namespace rtengine {
 		int width = data_fine->W;
 		int height = data_fine->H;
 		
-		float dirwt_l, dirwt_ab, norm_l, norm_ab;
-		//float lops,aops,bops;
-		float Lout, aout, bout;
+
 		
 		
 		//generate domain kernel 
@@ -286,7 +284,11 @@ namespace rtengine {
 		 }*/
 		//float domker[5][5] = {{1,1,1,1,1},{1,2,2,2,1},{1,2,4,2,1},{1,2,2,2,1},{1,1,1,1,1}};
 		
-		for(int i = 0, i1=0; i < height; i+=pitch, i1++) {
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+ 
+		for(int i = 0; i < height; i+=pitch ) { int i1=i/pitch;
 			for(int j = 0, j1=0; j < width; j+=pitch, j1++)
 			{				
 				//norm = DIRWT(i, j, i, j);
@@ -294,6 +296,9 @@ namespace rtengine {
 				//aout = -norm*data_fine->a[i][j];
 				//bout = -norm*data_fine->b[i][j];
 				//or
+				float dirwt_l, dirwt_ab, norm_l, norm_ab;
+				//float lops,aops,bops;
+				float Lout, aout, bout;
 				norm_l = norm_ab = 0;//if we do want to include the input pixel in the sum
 				Lout = 0;
 				aout = 0;
@@ -339,9 +344,6 @@ namespace rtengine {
 		int height = data_fine->H;
 		
 		//float eps = 0.0;
-		double wtdsum[3], norm;
-		float hipass[3], hpffluct[3], tonefactor, nrfactor;
-		int i, j, ix, jx;	
 		
 		// c[0] noise_L
 		// c[1] noise_ab (relative to noise_L)
@@ -387,9 +389,14 @@ namespace rtengine {
 		if (pitch==1) { 
 			
 			// step (1-2-3-4) 
-			for( i = 0; i < height; i++)
-				for( j = 0; j < width; j++) {
-					
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+			for(int  i = 0; i < height; i++)
+				for(int  j = 0; j < width; j++) {
+					double wtdsum[3], norm;
+					float hipass[3], hpffluct[3], tonefactor, nrfactor;
+				
 					tonefactor = ((NRWT_L(data_coarse->L[i][j])));
 					
 					//Wiener filter
@@ -420,25 +427,37 @@ namespace rtengine {
 			LabImage* smooth;
 			
 			smooth = new LabImage(width, height);
-			
-			for( i = 0, ix=0; i < height; i+=pitch, ix++)
-				for( j = 0, jx=0; j < width; j+=pitch, jx++) {
+#ifdef _OPENMP
+#pragma omp parallel
+#endif
+
+{
+		
+#ifdef _OPENMP
+#pragma omp for
+#endif
+			for(int  i = 0; i < height; i+=pitch)
+			{
+				int ix=i/pitch;
+				for(int  j = 0, jx=0; j < width; j+=pitch, jx++) {
 					
 					//copy common pixels
 					smooth->L[i][j] = data_coarse->L[ix][jx];
 					smooth->a[i][j] = data_coarse->a[ix][jx];
 					smooth->b[i][j] = data_coarse->b[ix][jx];
 				}
-			
+			}
 			//if (pitch>1) {//pitch=2; step (1) expand coarse image, fill in missing data
-			
-			for( i = 0; i < height-1; i+=2)
-				for( j = 0; j < width-1; j+=2) {
+#ifdef _OPENMP
+#pragma omp for	
+#endif
+			for(int  i = 0; i < height-1; i+=2)
+				for(int j = 0; j < width-1; j+=2) {
 					//do midpoint first
-					norm=0;
-					wtdsum[0]=wtdsum[1]=wtdsum[2]=0.0;
-					for( ix=i; ix<MIN(height,i+3); ix+=2)
-						for ( jx=j; jx<MIN(width,j+3); jx+=2) {
+					double norm=0.0,wtdsum[3]={0.0,0.0,0.0};
+					//wtdsum[0]=wtdsum[1]=wtdsum[2]=0.0;
+					for(int ix=i; ix<MIN(height,i+3); ix+=2)
+						for (int jx=j; jx<MIN(width,j+3); jx+=2) {
 							wtdsum[0] += smooth->L[ix][jx];
 							wtdsum[1] += smooth->a[ix][jx];
 							wtdsum[2] += smooth->b[ix][jx];
@@ -449,20 +468,23 @@ namespace rtengine {
 					smooth->a[i+1][j+1]=wtdsum[1]*norm;
 					smooth->b[i+1][j+1]=wtdsum[2]*norm;
 				}
+#ifdef _OPENMP
+#pragma omp for
+#endif
 			
-			for( i = 0; i < height-1; i+=2)
-				for( j = 0; j < width-1; j+=2) {
+			for(int i = 0; i < height-1; i+=2)
+				for(int j = 0; j < width-1; j+=2) {
 					//now right neighbor
 					if (j+1==width) continue;
-					norm=0;
-					wtdsum[0]=wtdsum[1]=wtdsum[2]=0.0;
-					for (jx=j; jx<MIN(width,j+3); jx+=2) {
+					double norm=0.0,wtdsum[3]={0.0,0.0,0.0};
+
+					for (int jx=j; jx<MIN(width,j+3); jx+=2) {
 						wtdsum[0] += smooth->L[i][jx];
 						wtdsum[1] += smooth->a[i][jx];
 						wtdsum[2] += smooth->b[i][jx];
 						norm++;
 					}
-					for (ix=MAX(0,i-1); ix<MIN(height,i+2); ix+=2) {
+					for (int ix=MAX(0,i-1); ix<MIN(height,i+2); ix+=2) {
 						wtdsum[0] += smooth->L[ix][j+1];
 						wtdsum[1] += smooth->a[ix][j+1];
 						wtdsum[2] += smooth->b[ix][j+1];
@@ -475,15 +497,14 @@ namespace rtengine {
 					
 					//now down neighbor
 					if (i+1==height) continue;
-					norm=0;
-					wtdsum[0]=wtdsum[1]=wtdsum[2]=0.0;
-					for (ix=i; ix<MIN(height,i+3); ix+=2) {
+					norm=0.0;wtdsum[0]=wtdsum[1]=wtdsum[2]=0.0;
+					for (int ix=i; ix<MIN(height,i+3); ix+=2) {
 						wtdsum[0] += smooth->L[ix][j];
 						wtdsum[1] += smooth->a[ix][j];
 						wtdsum[2] += smooth->b[ix][j];
 						norm++;
 					}
-					for (jx=MAX(0,j-1); jx<MIN(width,j+2); jx+=2) {
+					for (int jx=MAX(0,j-1); jx<MIN(width,j+2); jx+=2) {
 						wtdsum[0] += smooth->L[i+1][jx];
 						wtdsum[1] += smooth->a[i+1][jx];
 						wtdsum[2] += smooth->b[i+1][jx];
@@ -496,13 +517,17 @@ namespace rtengine {
 					
 				}
 			
-			
+#ifdef _OPENMP
+#pragma omp for	
+#endif
+		
 			// step (2-3-4) 
-			for( i = 0; i < height; i++)
-				for( j = 0; j < width; j++) {
-					
-					tonefactor = ((NRWT_L(smooth->L[i][j])));
+			for( int i = 0; i < height; i++)
+				for(int j = 0; j < width; j++) {
 					
+					double tonefactor = ((NRWT_L(smooth->L[i][j])));
+					//double wtdsum[3], norm;
+					float hipass[3], hpffluct[3],  nrfactor;
 					//Wiener filter
 					//luma
 					if (level<2) {
@@ -525,7 +550,7 @@ namespace rtengine {
 					data_fine->a[i][j] = hipass[1]+smooth->a[i][j];
 					data_fine->b[i][j] = hipass[2]+smooth->b[i][j];
 				}
-			
+}	// end parallel		
 			delete smooth;
 		}//end of pitch>1
 		
--- a/rtengine/dirpyrLab_equalizer.cc
+++ b/rtengine/dirpyrLab_equalizer.cc
@@ -288,8 +288,7 @@ namespace rtengine {
 		int width = data_fine->W;
 		int height = data_fine->H;
 		
-		float Lout, aout, bout;
-		float dirwt, norm;
+
 		
 		//generate domain kernel 
 		int halfwin = 1;//MIN(ceil(2*sig),3);
@@ -304,11 +303,14 @@ namespace rtengine {
 		//float domker[5][5] = {{1,1,1,1,1},{1,2,2,2,1},{1,2,4,2,1},{1,2,2,2,1},{1,1,1,1,1}};
 		
 		//float domker[3][3] = {{1,1,1},{1,2,1},{1,1,1}};
-		
-		
-		for(int i = 0, i1=0; i < height; i+=pitch, i1++) {
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
+		for(int i = 0; i < height; i+=pitch) { int i1=i/pitch;
 			for(int j = 0, j1=0; j < width; j+=pitch, j1++)
 			{				
+				float Lout, aout, bout;
+				float norm;
 				norm = 0;//if we do want to include the input pixel in the sum
 				Lout = 0;
 				aout = 0;
@@ -316,7 +318,7 @@ namespace rtengine {
 				
 				for(int inbr=MAX(0,i-scalewin); inbr<=MIN(height-1,i+scalewin); inbr+=scale) {
 					for (int jnbr=MAX(0,j-scalewin); jnbr<=MIN(width-1,j+scalewin); jnbr+=scale) {
-						dirwt = DIRWT(inbr, jnbr, i, j);
+						float dirwt = DIRWT(inbr, jnbr, i, j);
 						Lout += dirwt*data_fine->L[inbr][jnbr];
 						aout += dirwt*data_fine->a[inbr][jnbr];
 						bout += dirwt*data_fine->b[inbr][jnbr];
@@ -382,18 +384,21 @@ namespace rtengine {
 		
 		if (pitch==1) {
 			// step (1-2-3-4) 
+#ifdef _OPENMP
+#pragma omp parallel for
+#endif
 			for(int i = 0; i < height; i++)
 				for(int j = 0; j < width; j++) {
 					
 					//luma
-					hipass[0] = (float)data_fine->L[i][j]-data_coarse->L[i][j];
-					buffer[0][i*scale][j*scale] += hipass[0] * lumamult[level];//*luma;
+					float hipass0 = (float)data_fine->L[i][j]-data_coarse->L[i][j];
+					buffer[0][i*scale][j*scale] += hipass0 * lumamult[level];//*luma;
 					
 					//chroma
-					hipass[1] = data_fine->a[i][j]-data_coarse->a[i][j];
-					hipass[2] = data_fine->b[i][j]-data_coarse->b[i][j];
-					buffer[1][i*scale][j*scale] += hipass[1] * chromamult[level]; //*chroma;
-					buffer[2][i*scale][j*scale] += hipass[2] * chromamult[level]; //*chroma;
+					float hipass1 = data_fine->a[i][j]-data_coarse->a[i][j];
+					float hipass2 = data_fine->b[i][j]-data_coarse->b[i][j];
+					buffer[1][i*scale][j*scale] += hipass1 * chromamult[level]; //*chroma;
+					buffer[2][i*scale][j*scale] += hipass2 * chromamult[level]; //*chroma;
 				}
 			
 		} else {
@@ -402,10 +407,16 @@ namespace rtengine {
 			//if (pitch>1), pitch=2; expand coarse image, fill in missing data
 			
 			LabImage* smooth;
-			
 			smooth = new LabImage(width, height);
-			
-			for(int i = 0, i2=0; i < height; i+=pitch, i2++)
+#ifdef _OPENMP
+#pragma	omp parallel
+#endif
+
+{
+#ifdef _OPENMP
+#pragma omp for
+#endif
+			for(int i = 0; i < height; i+=pitch){ int i2=i/pitch;
 				for(int j = 0, j2=0; j < width; j+=pitch, j2++) {
 					
 					//copy common pixels
@@ -413,9 +424,11 @@ namespace rtengine {
 					smooth->a[i][j] = data_coarse->a[i2][j2];
 					smooth->b[i][j] = data_coarse->b[i2][j2];
 				}
-			//}
+			}
 						
-			
+#ifdef _OPENMP
+#pragma omp for
+#endif
 			for(int i = 0; i < height-1; i+=2)
 				for(int j = 0; j < width-1; j+=2) {
 					//do midpoint first
@@ -440,7 +453,9 @@ namespace rtengine {
 					buffer[1][(i+1)*scale][(j+1)*scale]=wtdsum[4]*norm;
 					buffer[2][(i+1)*scale][(j+1)*scale]=wtdsum[5]*norm;
 				}
-			
+#ifdef _OPENMP
+#pragma omp for
+#endif
 			for(int i = 0; i < height-1; i+=2)
 				for(int j = 0; j < width-1; j+=2) {
 					//now right neighbor
@@ -511,6 +526,9 @@ namespace rtengine {
 			
 			
 			// step (2-3-4) 
+#ifdef _OPENMP
+#pragma omp for
+#endif
 			for(int i = 0; i < height; i++)
 				for(int j = 0; j < width; j++) {
 					
@@ -524,7 +542,7 @@ namespace rtengine {
 					buffer[1][i*scale][j*scale] += hipass[1] * chromamult[level]; //*chroma;
 					buffer[2][i*scale][j*scale] += hipass[2] * chromamult[level]; //*chroma;
 				}
-			
+}	// end parallel		
 			delete smooth;
 			
 		}
--- a/rtengine/procparams.cc
+++ b/rtengine/procparams.cc
@@ -350,7 +350,7 @@ int ProcParams::save (Glib::ustring fname) const {
    {
        std::stringstream ss;
        ss << "Mult" << i;
-        keyFile.set_integer("Directional Pyramid Equalizer", ss.str(), dirpyrequalizer.mult[i]);
+        keyFile.set_double("Directional Pyramid Equalizer", ss.str(), dirpyrequalizer.mult[i]);
    }

    // save exif change list
@@ -611,7 +611,7 @@ if (keyFile.has_group ("Directional Pyramid Equalizer")) {
 	{
 		std::stringstream ss;
 		ss << "Mult" << i;
-		if(keyFile.has_key ("Directional Pyramid Equalizer", ss.str())) dirpyrequalizer.mult[i] = keyFile.get_integer ("Directional Pyramid Equalizer", ss.str());
+		if(keyFile.has_key ("Directional Pyramid Equalizer", ss.str())) dirpyrequalizer.mult[i] = keyFile.get_double ("Directional Pyramid Equalizer", ss.str());
 	}
 }

--- a/rtgui/dirpyrequalizer.cc
+++ b/rtgui/dirpyrequalizer.cc
@@ -146,7 +146,7 @@ void DirPyrEqualizer::write (ProcParams* pp, ParamsEdited* pedited) {
    pp->dirpyrequalizer.enabled = enabled->get_active ();

    for (int i = 0; i < 8; i++) {
-        pp->dirpyrequalizer.mult[i] = (int) multiplier[i]->getValue();
+        pp->dirpyrequalizer.mult[i] = multiplier[i]->getValue();
    }

    if (pedited) {