OMP mods in FTblockDN.cc (backing out those changes that lead to artifacts)

More work is needed here to utilise AlignedBufferMP
This commit is contained in:
michael
2012-07-07 14:32:17 -04:00
parent bc46f0a871
commit 9735b3ff46

View File

@@ -205,6 +205,7 @@ namespace rtengine {
//now we have tile dimensions, overlaps //now we have tile dimensions, overlaps
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//adding omp here slows it down
for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) { for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) {
for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) { for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) {
@@ -222,10 +223,15 @@ namespace rtengine {
//pixel weight //pixel weight
array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks
// OMP candidate? //#ifdef _OPENMP
//#pragma omp parallel for
//#endif
//TODO: implement using AlignedBufferMP
//fill tile from image; convert RGB to "luma/chroma" //fill tile from image; convert RGB to "luma/chroma"
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++) for (int i=tiletop/*, i1=0*/; i<tilebottom; i++/*, i1++*/) {
for (int j=tileleft, j1=0; j<tileright; j++, j1++) { int i1 = i - tiletop;
for (int j=tileleft/*, j1=0*/; j<tileright; j++/*, j1++*/) {
int j1 = j - tileleft;
float X = gain*src->r[i][j];//xyz_prophoto[0][0]*src->r[i][j] + xyz_prophoto[0][1]*src->g[i][j] + xyz_prophoto[0][2]*src->b[i][j]; float X = gain*src->r[i][j];//xyz_prophoto[0][0]*src->r[i][j] + xyz_prophoto[0][1]*src->g[i][j] + xyz_prophoto[0][2]*src->b[i][j];
float Y = gain*src->g[i][j];//xyz_prophoto[1][0]*src->r[i][j] + xyz_prophoto[1][1]*src->g[i][j] + xyz_prophoto[1][2]*src->b[i][j]; float Y = gain*src->g[i][j];//xyz_prophoto[1][0]*src->r[i][j] + xyz_prophoto[1][1]*src->g[i][j] + xyz_prophoto[1][2]*src->b[i][j];
@@ -243,6 +249,7 @@ namespace rtengine {
Lin[i1][j1] = Y; Lin[i1][j1] = Y;
totwt[i1][j1] = 0; totwt[i1][j1] = 0;
} }
}
//initial impulse denoise //initial impulse denoise
if (dnparams.luma>0.01) { if (dnparams.luma>0.01) {
@@ -319,6 +326,7 @@ namespace rtengine {
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
// Main detail recovery algorithm: Block loop // Main detail recovery algorithm: Block loop
//OpenMP here //OpenMP here
//adding omp here leads to artifacts
for (int vblk=0; vblk<numblox_H; vblk++) { for (int vblk=0; vblk<numblox_H; vblk++) {
//printf("vblock=%d",vblk); //printf("vblock=%d",vblk);
int vblkmod = vblk%8; int vblkmod = vblk%8;
@@ -328,8 +336,12 @@ namespace rtengine {
float * buffer = new float [width + TS + 2*blkrad*offset]; float * buffer = new float [width + TS + 2*blkrad*offset];
float * datarow = buffer+blkrad*offset; float * datarow = buffer+blkrad*offset;
for (int i=0, row=top; i<TS; i++, row++) { //#ifdef _OPENMP
//#pragma omp parallel for
//#endif
//TODO: implement using AlignedBufferMP
for (int i=0/*, row=top*/; i<TS; i++/*, row++*/) {
int row = top + i;
int rr = row; int rr = row;
if (row<0) { if (row<0) {
rr = MIN(-row,height-1); rr = MIN(-row,height-1);
@@ -349,9 +361,7 @@ namespace rtengine {
}//now we have a padded data row }//now we have a padded data row
//now fill this row of the blocks with Lab high pass data //now fill this row of the blocks with Lab high pass data
#ifdef _OPENMP //OMP here does not add speed, better handled on the outside loop
#pragma omp parallel for
#endif
for (int hblk=0; hblk<numblox_W; hblk++) { for (int hblk=0; hblk<numblox_W; hblk++) {
int left = (hblk-blkrad)*offset; int left = (hblk-blkrad)*offset;
int indx = (hblk)*TS;//index of block in malloc int indx = (hblk)*TS;//index of block in malloc
@@ -410,9 +420,10 @@ namespace rtengine {
fftwf_cleanup(); fftwf_cleanup();
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#ifdef _OPENMP //#ifdef _OPENMP
#pragma omp parallel for //#pragma omp parallel for
#endif //#endif
//TODO: implement using AlignedBufferMP
for (int i=0; i<height; i++) { for (int i=0; i<height; i++) {
for (int j=0; j<width; j++) { for (int j=0; j<width; j++) {
//may want to include masking threshold for large hipass data to preserve edges/detail //may want to include masking threshold for large hipass data to preserve edges/detail
@@ -444,11 +455,15 @@ namespace rtengine {
if (tileright<imwidth) Hmask[width-1-i] = mask; if (tileright<imwidth) Hmask[width-1-i] = mask;
} }
//TODO: OMP candidate? #ifdef _OPENMP
#pragma omp parallel for
#endif
//convert back to RGB and write to destination array //convert back to RGB and write to destination array
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++) { for (int i=tiletop/* i1=0*/; i<tilebottom; i++/*, i1++*/){
int i1 = i-tiletop;
float X,Y,Z; float X,Y,Z;
for (int j=tileleft, j1=0; j<tileright; j++, j1++) { for (int j=tileleft/*, j1=0*/; j<tileright; j++/*, j1++*/) {
int j1=j-tileleft;
Y = labdn->L[i1][j1]; Y = labdn->L[i1][j1];
X = (labdn->a[i1][j1]) + Y; X = (labdn->a[i1][j1]) + Y;
@@ -501,9 +516,10 @@ namespace rtengine {
boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT
#ifdef _OPENMP //#ifdef _OPENMP
#pragma omp parallel for //#pragma omp parallel for
#endif //#endif
//TODO: implement using AlignedBufferMP
for (int n=0; n<TS*TS; n++) { //for DCT for (int n=0; n<TS*TS; n++) { //for DCT
fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail)); fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail));
}//output neighbor averaged result }//output neighbor averaged result
@@ -854,7 +870,7 @@ namespace rtengine {
boxblur(sfavea, sfavea, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage boxblur(sfavea, sfavea, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
boxblur(sfaveb, sfaveb, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage boxblur(sfaveb, sfaveb, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
//MK
#ifdef _OPENMP #ifdef _OPENMP
#pragma omp parallel for #pragma omp parallel for
#endif #endif