From 64c51e5a4bca1bd20169d09d2dbd03cfb68618cf Mon Sep 17 00:00:00 2001 From: Oliver Duis Date: Sat, 14 Jul 2012 17:20:24 +0200 Subject: [PATCH] Fixed several OMP bugs in denoise see issue 1474 --- rtengine/FTblockDN.cc | 42 +------ rtengine/boxblur.h | 252 +++++++++--------------------------------- 2 files changed, 57 insertions(+), 237 deletions(-) diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index 5bb08d3e7..c83203e21 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -97,17 +97,9 @@ namespace rtengine { const short int imheight=src->height, imwidth=src->width; - if (dnparams.luma==0 && dnparams.chroma==0) {//nothing to do; copy src to dst -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int i=0; ir[i][j] = src->r[i][j]; - dst->r[i][j] = src->r[i][j]; - dst->r[i][j] = src->r[i][j]; - } - } + if (dnparams.luma==0 && dnparams.chroma==0) { + //nothing to do; copy src to dst + memcpy(dst->data,src->data,dst->width*dst->height*3*sizeof(float)); return; } @@ -170,12 +162,7 @@ namespace rtengine { //output buffer Imagefloat * dsttmp = new Imagefloat(imwidth,imheight); -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int n=0; n<3*imwidth*imheight; n++) { - dsttmp->data[n] = 0; - } + for (int n=0; n<3*imwidth*imheight; n++) dsttmp->data[n] = 0; const int tilesize = 1024; const int overlap = 128; @@ -383,9 +370,6 @@ namespace rtengine { //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% // now process the vblk row of blocks for noise reduction -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int hblk=0; hblkdata, dsttmp->data, 3*imwidth*imheight*sizeof(float)); + memcpy (dst->data, dsttmp->data, 3*dst->width*dst->height*sizeof(float)); delete dsttmp; @@ -516,10 +495,6 @@ namespace rtengine { boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT -//#ifdef _OPENMP -//#pragma omp parallel for -//#endif -//TODO: implement using AlignedBufferMP for (int n=0; n void boxblur (T** src, A** dst, int radx, int rady, i //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //box blur image; box range = (radx,rady) - AlignedBufferMP buffer(W*H); + AlignedBuffer* buffer = new AlignedBuffer (W*H); + float* temp = buffer->data; if (radx==0) { #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; + for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = radx + 1; temp[row*W+0] = (float)src[row][0]/len; for (int j=1; j<=radx; j++) { @@ -79,7 +73,6 @@ template void boxblur (T** src, A** dst, int radx, int rady, i temp[row*W+col] = (temp[row*W+col-1]*len - src[row][col-radx-1])/(len-1); len --; } - buffer.release(pBuf); } } @@ -87,24 +80,16 @@ template void boxblur (T** src, A** dst, int radx, int rady, i #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - + for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = rady + 1; dst[0][col] = temp[0*W+col]/len; for (int i=1; i<=rady; i++) { @@ -121,9 +106,11 @@ template void boxblur (T** src, A** dst, int radx, int rady, i dst[row][col] = (dst[(row-1)][col]*len - temp[(row-rady-1)*W+col])/(len-1); len --; } - buffer.release(pBuf); } } + + delete buffer; + } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -135,29 +122,20 @@ template void boxblur (T* src, A* dst, int radx, int rady, int //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) - AlignedBufferMP buffer(W*H); + AlignedBuffer* buffer = new AlignedBuffer (W*H); + float* temp = buffer->data; if (radx==0) { -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; + for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = radx + 1; temp[row*W+0] = (float)src[row*W+0]/len; for (int j=1; j<=radx; j++) { @@ -174,7 +152,6 @@ template void boxblur (T* src, A* dst, int radx, int rady, int temp[row*W+col] = (temp[row*W+col-1]*len - src[row*W+col-radx-1])/(len-1); len --; } - buffer.release(pBuf); } } @@ -182,24 +159,16 @@ template void boxblur (T* src, A* dst, int radx, int rady, int #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - + for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; for (int i=1; i<=rady; i++) { @@ -216,10 +185,11 @@ template void boxblur (T* src, A* dst, int radx, int rady, int dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1); len --; } - buffer.release(pBuf); } } + delete buffer; + } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -228,10 +198,12 @@ template void boxblur (T* src, A* dst, int radx, int rady, int template void boxvar (T* src, T* dst, int radx, int rady, int W, int H) { - AlignedBufferMP buffer1(W*H); - AlignedBufferMP buffer2(W*H); - AlignedBufferMP buffer3(W*H); + AlignedBuffer buffer1(W*H); + AlignedBuffer buffer2(W*H); + float* tempave = buffer1.data; + float* tempsqave = buffer2.data; + AlignedBufferMP buffer3(H); //float image_ave = 0; @@ -241,15 +213,6 @@ template void boxvar (T* src, T* dst, int radx, int rady, int W, int #pragma omp parallel for #endif for (int row = 0; row < H; row++) { - AlignedBuffer* pBuf1 = buffer1.acquire(); - T* tempave=(T*)pBuf1->data; - - AlignedBuffer* pBuf2 = buffer2.acquire(); - T* tempsqave=(T*)pBuf2->data; - - AlignedBuffer* pBuf3 = buffer3.acquire(); - T* tempave2=(T*)pBuf3->data; - int len = radx + 1; tempave[row*W+0] = src[row*W+0]/len; tempsqave[row*W+0] = SQR(src[row*W+0])/len; @@ -271,9 +234,6 @@ template void boxvar (T* src, T* dst, int radx, int rady, int W, int tempsqave[row*W+col] = (tempsqave[row*W+col-1]*len - SQR(src[row*W+col-radx-1]))/(len-1); len --; } - buffer1.release(pBuf1); - buffer2.release(pBuf2); - buffer3.release(pBuf3); } //vertical blur @@ -281,12 +241,6 @@ template void boxvar (T* src, T* dst, int radx, int rady, int W, int #pragma omp parallel for #endif for (int col = 0; col < W; col++) { - AlignedBuffer* pBuf1 = buffer1.acquire(); - T* tempave=(T*)pBuf1->data; - - AlignedBuffer* pBuf2 = buffer2.acquire(); - T* tempsqave=(T*)pBuf2->data; - AlignedBuffer* pBuf3 = buffer3.acquire(); T* tempave2=(T*)pBuf3->data; @@ -316,8 +270,6 @@ template void boxvar (T* src, T* dst, int radx, int rady, int W, int dst[row*W+col] = fabs(dst[row*W+col] - SQR(tempave2[row])); //image_ave += src[row*W+col]; } - buffer1.release(pBuf1); - buffer2.release(pBuf2); buffer3.release(pBuf3); } @@ -333,35 +285,26 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) - AlignedBufferMP buffer1(W*H); - AlignedBufferMP buffer2(W*H); + AlignedBuffer* buffer1 = new AlignedBuffer (W*H); + float* temp = buffer1->data; + + AlignedBuffer* buffer2 = new AlignedBuffer (W*H); + float* tempave = buffer2->data; if (radx==0) { #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf1 = buffer1.acquire(); - T* temp=(T*)pBuf1->data; - + for (int row=0; row* pBuf1 = buffer1.acquire(); - T* temp=(T*)pBuf1->data; - - AlignedBuffer* pBuf2 = buffer2.acquire(); - T* tempave=(T*)pBuf2->data; - int len = radx + 1; temp[row*W+0] = (float)src[row*W+0]/len; for (int j=1; j<=radx; j++) { @@ -378,8 +321,6 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int temp[row*W+col] = (temp[row*W+col-1]*len - src[row*W+col-radx-1])/(len-1); len --; } - buffer1.release(pBuf1); - buffer2.release(pBuf2); } } @@ -388,31 +329,16 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int #pragma omp parallel for #endif for (int row=0; row* pBuf1 = buffer1.acquire(); - T* temp=(T*)pBuf1->data; - - AlignedBuffer* pBuf2 = buffer2.acquire(); - T* tempave=(T*)pBuf2->data; - for (int col=0; col* pBuf1 = buffer1.acquire(); - T* temp=(T*)pBuf1->data; - - AlignedBuffer* pBuf2 = buffer2.acquire(); - T* tempave=(T*)pBuf2->data; - int len = rady + 1; tempave[0*W+col] = temp[0*W+col]/len; for (int i=1; i<=rady; i++) { @@ -429,8 +355,6 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int tempave[row*W+col] = (tempave[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1); len --; } - buffer1.release(pBuf1); - buffer2.release(pBuf2); } } @@ -442,19 +366,10 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf1 = buffer1.acquire(); - T* temp=(T*)pBuf1->data; - - AlignedBuffer* pBuf2 = buffer2.acquire(); - T* tempave=(T*)pBuf2->data; - + for (int row=0; row void boxdev (T* src, T* dst, int radx, int rady, int W, int #pragma omp parallel for #endif for (int row = 0; row < H; row++) { - AlignedBuffer* pBuf1 = buffer1.acquire(); - T* temp=(T*)pBuf1->data; - - AlignedBuffer* pBuf2 = buffer2.acquire(); - T* tempave=(T*)pBuf2->data; - int len = radx + 1; temp[row*W+0] = fabs(src[row*W+0]-tempave[row*W+0])/len; for (int j=1; j<=radx; j++) { @@ -485,8 +394,6 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int temp[row*W+col] = (temp[row*W+col-1]*len - fabs(src[row*W+col-radx-1]-tempave[row*W+col-radx-1]))/(len-1); len --; } - buffer1.release(pBuf1); - buffer2.release(pBuf2); } } @@ -494,15 +401,10 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf1 = buffer1.acquire(); - T* temp=(T*)pBuf1->data; - + for (int row=0; row void boxdev (T* src, T* dst, int radx, int rady, int W, int #pragma omp parallel for #endif for (int col = 0; col < W; col++) { - AlignedBuffer* pBuf1 = buffer1.acquire(); - T* temp=(T*)pBuf1->data; - int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; for (int i=1; i<=rady; i++) { @@ -529,10 +428,12 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1); len --; } - - buffer1.release(pBuf1); } } + + delete buffer1; + delete buffer2; + } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -544,21 +445,17 @@ template void boxsqblur (T* src, A* dst, int radx, int rady, i //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) - AlignedBufferMP buffer(W*H); + AlignedBuffer* buffer = new AlignedBuffer (W*H); + float* temp = buffer->data; if (radx==0) { #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - + for (int row=0; row void boxsqblur (T* src, A* dst, int radx, int rady, i #pragma omp parallel for #endif for (int row = 0; row < H; row++) { - AlignedBuffer* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = radx + 1; temp[row*W+0] = SQR((float)src[row*W+0])/len; for (int j=1; j<=radx; j++) { @@ -585,8 +479,6 @@ template void boxsqblur (T* src, A* dst, int radx, int rady, i temp[row*W+col] = (temp[row*W+col-1]*len - SQR(src[row*W+col-radx-1]))/(len-1); len --; } - - buffer.release(pBuf); } } @@ -594,15 +486,10 @@ template void boxsqblur (T* src, A* dst, int radx, int rady, i #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - + for (int row=0; row void boxsqblur (T* src, A* dst, int radx, int rady, i #pragma omp parallel for #endif for (int col = 0; col < W; col++) { - AlignedBuffer* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; for (int i=1; i<=rady; i++) { @@ -629,10 +513,11 @@ template void boxsqblur (T* src, A* dst, int radx, int rady, i dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1); len --; } - - buffer.release(pBuf); } } + + delete buffer; + } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -644,33 +529,20 @@ template void boxcorrelate (T* src, A* dst, int dx, int dy, in //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) - AlignedBufferMP buffer(W*H); + AlignedBuffer* buffer = new AlignedBuffer (W*H); + float* temp = buffer->data; if (radx==0) { -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int rr = min(H-1,max(0,row+dy)); for (int col=0; col0 ? (src[row*W+col])*(src[rr*W+cc]) : 0; } - buffer.release(pBuf); } } else { //horizontal blur -//OpenMP here -#ifdef _OPENMP -#pragma omp parallel for -#endif for (int row = 0; row < H; row++) { - AlignedBuffer* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = radx + 1; int rr = min(H-1,max(0,row+dy)); int cc = min(W-1,max(0,0+dx)); @@ -695,7 +567,6 @@ template void boxcorrelate (T* src, A* dst, int dx, int dy, in temp[row*W+col] = (temp[row*W+col-1]*len - (src[row*W+col-radx-1])*(src[rr*W+cc1]))/(len-1); len --; } - buffer.release(pBuf); } } @@ -703,15 +574,10 @@ template void boxcorrelate (T* src, A* dst, int dx, int dy, in #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - + for (int row=0; row void boxcorrelate (T* src, A* dst, int dx, int dy, in #pragma omp parallel for #endif for (int col = 0; col < W; col++) { - AlignedBuffer* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; for (int i=1; i<=rady; i++) { @@ -738,9 +601,11 @@ template void boxcorrelate (T* src, A* dst, int dx, int dy, in dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1); len --; } - buffer.release(pBuf); } } + + delete buffer; + } @@ -753,21 +618,17 @@ template void boxabsblur (T* src, A* dst, int radx, int rady, //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% //box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1) - AlignedBufferMP buffer(W*H); + AlignedBuffer* buffer = new AlignedBuffer (W*H); + float* temp = buffer->data; if (radx==0) { #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - + for (int row=0; row void boxabsblur (T* src, A* dst, int radx, int rady, #pragma omp parallel for #endif for (int row = 0; row < H; row++) { - AlignedBuffer* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = radx + 1; temp[row*W+0] = fabs((float)src[row*W+0])/len; for (int j=1; j<=radx; j++) { @@ -794,7 +652,6 @@ template void boxabsblur (T* src, A* dst, int radx, int rady, temp[row*W+col] = (temp[row*W+col-1]*len - fabs(src[row*W+col-radx-1]))/(len-1); len --; } - buffer.release(pBuf); } } @@ -802,15 +659,10 @@ template void boxabsblur (T* src, A* dst, int radx, int rady, #ifdef _OPENMP #pragma omp parallel for #endif - for (int row=0; row* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - + for (int row=0; row void boxabsblur (T* src, A* dst, int radx, int rady, #pragma omp parallel for #endif for (int col = 0; col < W; col++) { - AlignedBuffer* pBuf = buffer.acquire(); - T* temp=(T*)pBuf->data; - int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; for (int i=1; i<=rady; i++) { @@ -837,12 +686,13 @@ template void boxabsblur (T* src, A* dst, int radx, int rady, dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1); len --; } - buffer.release(pBuf); - } } } -//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + delete buffer; + +} + //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% }