Fixed several OMP bugs in denoise

see issue 1474
This commit is contained in:
Oliver Duis
2012-07-14 17:20:24 +02:00
parent e56b12022d
commit 64c51e5a4b
2 changed files with 57 additions and 237 deletions

View File

@@ -40,29 +40,23 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//box blur image; box range = (radx,rady)
AlignedBufferMP<float> buffer(W*H);
AlignedBuffer<float>* buffer = new AlignedBuffer<float> (W*H);
float* temp = buffer->data;
if (radx==0) {
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
temp[row*H+col] = (float)src[row][col];
}
buffer.release(pBuf);
}
} else {
//horizontal blur
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row = 0; row < H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = radx + 1;
temp[row*W+0] = (float)src[row][0]/len;
for (int j=1; j<=radx; j++) {
@@ -79,7 +73,6 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
temp[row*W+col] = (temp[row*W+col-1]*len - src[row][col-radx-1])/(len-1);
len --;
}
buffer.release(pBuf);
}
}
@@ -87,24 +80,16 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
dst[row][col] = temp[row*W+col];
}
buffer.release(pBuf);
}
} else {
//vertical blur
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int col = 0; col < W; col++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = rady + 1;
dst[0][col] = temp[0*W+col]/len;
for (int i=1; i<=rady; i++) {
@@ -121,9 +106,11 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
dst[row][col] = (dst[(row-1)][col]*len - temp[(row-rady-1)*W+col])/(len-1);
len --;
}
buffer.release(pBuf);
}
}
delete buffer;
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -135,29 +122,20 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
AlignedBufferMP<float> buffer(W*H);
AlignedBuffer<float>* buffer = new AlignedBuffer<float> (W*H);
float* temp = buffer->data;
if (radx==0) {
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++){
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
temp[row*H+col] = src[row*W+col];
}
buffer.release(pBuf);
}
} else {
//horizontal blur
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row = 0; row < H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = radx + 1;
temp[row*W+0] = (float)src[row*W+0]/len;
for (int j=1; j<=radx; j++) {
@@ -174,7 +152,6 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
temp[row*W+col] = (temp[row*W+col-1]*len - src[row*W+col-radx-1])/(len-1);
len --;
}
buffer.release(pBuf);
}
}
@@ -182,24 +159,16 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
dst[row*W+col] = temp[row*W+col];
}
buffer.release(pBuf);
}
} else {
//vertical blur
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int col = 0; col < W; col++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = rady + 1;
dst[0*W+col] = temp[0*W+col]/len;
for (int i=1; i<=rady; i++) {
@@ -216,10 +185,11 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1);
len --;
}
buffer.release(pBuf);
}
}
delete buffer;
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -228,10 +198,12 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
template<typename T> void boxvar (T* src, T* dst, int radx, int rady, int W, int H) {
AlignedBufferMP<float> buffer1(W*H);
AlignedBufferMP<float> buffer2(W*H);
AlignedBufferMP<float> buffer3(W*H);
AlignedBuffer<float> buffer1(W*H);
AlignedBuffer<float> buffer2(W*H);
float* tempave = buffer1.data;
float* tempsqave = buffer2.data;
AlignedBufferMP<float> buffer3(H);
//float image_ave = 0;
@@ -241,15 +213,6 @@ template<typename T> void boxvar (T* src, T* dst, int radx, int rady, int W, int
#pragma omp parallel for
#endif
for (int row = 0; row < H; row++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* tempave=(T*)pBuf1->data;
AlignedBuffer<float>* pBuf2 = buffer2.acquire();
T* tempsqave=(T*)pBuf2->data;
AlignedBuffer<float>* pBuf3 = buffer3.acquire();
T* tempave2=(T*)pBuf3->data;
int len = radx + 1;
tempave[row*W+0] = src[row*W+0]/len;
tempsqave[row*W+0] = SQR(src[row*W+0])/len;
@@ -271,9 +234,6 @@ template<typename T> void boxvar (T* src, T* dst, int radx, int rady, int W, int
tempsqave[row*W+col] = (tempsqave[row*W+col-1]*len - SQR(src[row*W+col-radx-1]))/(len-1);
len --;
}
buffer1.release(pBuf1);
buffer2.release(pBuf2);
buffer3.release(pBuf3);
}
//vertical blur
@@ -281,12 +241,6 @@ template<typename T> void boxvar (T* src, T* dst, int radx, int rady, int W, int
#pragma omp parallel for
#endif
for (int col = 0; col < W; col++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* tempave=(T*)pBuf1->data;
AlignedBuffer<float>* pBuf2 = buffer2.acquire();
T* tempsqave=(T*)pBuf2->data;
AlignedBuffer<float>* pBuf3 = buffer3.acquire();
T* tempave2=(T*)pBuf3->data;
@@ -316,8 +270,6 @@ template<typename T> void boxvar (T* src, T* dst, int radx, int rady, int W, int
dst[row*W+col] = fabs(dst[row*W+col] - SQR(tempave2[row]));
//image_ave += src[row*W+col];
}
buffer1.release(pBuf1);
buffer2.release(pBuf2);
buffer3.release(pBuf3);
}
@@ -333,35 +285,26 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
AlignedBufferMP<float> buffer1(W*H);
AlignedBufferMP<float> buffer2(W*H);
AlignedBuffer<float>* buffer1 = new AlignedBuffer<float> (W*H);
float* temp = buffer1->data;
AlignedBuffer<float>* buffer2 = new AlignedBuffer<float> (W*H);
float* tempave = buffer2->data;
if (radx==0) {
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* temp=(T*)pBuf1->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
temp[row*H+col] = src[row*W+col];
}
buffer1.release(pBuf1);
}
} else {
//horizontal blur
//OpenMP here
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row = 0; row < H; row++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* temp=(T*)pBuf1->data;
AlignedBuffer<float>* pBuf2 = buffer2.acquire();
T* tempave=(T*)pBuf2->data;
int len = radx + 1;
temp[row*W+0] = (float)src[row*W+0]/len;
for (int j=1; j<=radx; j++) {
@@ -378,8 +321,6 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
temp[row*W+col] = (temp[row*W+col-1]*len - src[row*W+col-radx-1])/(len-1);
len --;
}
buffer1.release(pBuf1);
buffer2.release(pBuf2);
}
}
@@ -388,31 +329,16 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
#pragma omp parallel for
#endif
for (int row=0; row<H; row++){
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* temp=(T*)pBuf1->data;
AlignedBuffer<float>* pBuf2 = buffer2.acquire();
T* tempave=(T*)pBuf2->data;
for (int col=0; col<H; col++) {
tempave[row*W+col] = temp[row*W+col];
}
buffer1.release(pBuf1);
buffer2.release(pBuf2);
}
} else {
//vertical blur
//OpenMP here
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int col = 0; col < W; col++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* temp=(T*)pBuf1->data;
AlignedBuffer<float>* pBuf2 = buffer2.acquire();
T* tempave=(T*)pBuf2->data;
int len = rady + 1;
tempave[0*W+col] = temp[0*W+col]/len;
for (int i=1; i<=rady; i++) {
@@ -429,8 +355,6 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
tempave[row*W+col] = (tempave[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1);
len --;
}
buffer1.release(pBuf1);
buffer2.release(pBuf2);
}
}
@@ -442,19 +366,10 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* temp=(T*)pBuf1->data;
AlignedBuffer<float>* pBuf2 = buffer2.acquire();
T* tempave=(T*)pBuf2->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
temp[row*H+col] = fabs(src[row*W+col]-tempave[row*W+col]);
}
buffer1.release(pBuf1);
buffer2.release(pBuf2);
}
} else {
//horizontal blur
//OpenMP here
@@ -462,12 +377,6 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
#pragma omp parallel for
#endif
for (int row = 0; row < H; row++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* temp=(T*)pBuf1->data;
AlignedBuffer<float>* pBuf2 = buffer2.acquire();
T* tempave=(T*)pBuf2->data;
int len = radx + 1;
temp[row*W+0] = fabs(src[row*W+0]-tempave[row*W+0])/len;
for (int j=1; j<=radx; j++) {
@@ -485,8 +394,6 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
temp[row*W+col] = (temp[row*W+col-1]*len - fabs(src[row*W+col-radx-1]-tempave[row*W+col-radx-1]))/(len-1);
len --;
}
buffer1.release(pBuf1);
buffer2.release(pBuf2);
}
}
@@ -494,15 +401,10 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* temp=(T*)pBuf1->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
dst[row*W+col] = temp[row*W+col];
}
buffer1.release(pBuf1);
}
} else {
//vertical blur
//OpenMP here
@@ -510,9 +412,6 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
#pragma omp parallel for
#endif
for (int col = 0; col < W; col++) {
AlignedBuffer<float>* pBuf1 = buffer1.acquire();
T* temp=(T*)pBuf1->data;
int len = rady + 1;
dst[0*W+col] = temp[0*W+col]/len;
for (int i=1; i<=rady; i++) {
@@ -529,10 +428,12 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1);
len --;
}
buffer1.release(pBuf1);
}
}
delete buffer1;
delete buffer2;
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -544,21 +445,17 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
AlignedBufferMP<float> buffer(W*H);
AlignedBuffer<float>* buffer = new AlignedBuffer<float> (W*H);
float* temp = buffer->data;
if (radx==0) {
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
temp[row*H+col] = SQR(src[row*W+col]);
}
buffer.release(pBuf);
}
} else {
//horizontal blur
//OpenMP here
@@ -566,9 +463,6 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
#pragma omp parallel for
#endif
for (int row = 0; row < H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = radx + 1;
temp[row*W+0] = SQR((float)src[row*W+0])/len;
for (int j=1; j<=radx; j++) {
@@ -585,8 +479,6 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
temp[row*W+col] = (temp[row*W+col-1]*len - SQR(src[row*W+col-radx-1]))/(len-1);
len --;
}
buffer.release(pBuf);
}
}
@@ -594,15 +486,10 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
dst[row*W+col] = temp[row*W+col];
}
buffer.release(pBuf);
}
} else {
//vertical blur
//OpenMP here
@@ -610,9 +497,6 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
#pragma omp parallel for
#endif
for (int col = 0; col < W; col++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = rady + 1;
dst[0*W+col] = temp[0*W+col]/len;
for (int i=1; i<=rady; i++) {
@@ -629,10 +513,11 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1);
len --;
}
buffer.release(pBuf);
}
}
delete buffer;
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -644,33 +529,20 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
AlignedBufferMP<float> buffer(W*H);
AlignedBuffer<float>* buffer = new AlignedBuffer<float> (W*H);
float* temp = buffer->data;
if (radx==0) {
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int rr = min(H-1,max(0,row+dy));
for (int col=0; col<H; col++) {
int cc = min(W-1,max(0,col+dx));
temp[row*H+col] = dy>0 ? (src[row*W+col])*(src[rr*W+cc]) : 0;
}
buffer.release(pBuf);
}
} else {
//horizontal blur
//OpenMP here
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row = 0; row < H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = radx + 1;
int rr = min(H-1,max(0,row+dy));
int cc = min(W-1,max(0,0+dx));
@@ -695,7 +567,6 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
temp[row*W+col] = (temp[row*W+col-1]*len - (src[row*W+col-radx-1])*(src[rr*W+cc1]))/(len-1);
len --;
}
buffer.release(pBuf);
}
}
@@ -703,15 +574,10 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
dst[row*W+col] = temp[row*W+col];
}
buffer.release(pBuf);
}
} else {
//vertical blur
//OpenMP here
@@ -719,9 +585,6 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
#pragma omp parallel for
#endif
for (int col = 0; col < W; col++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = rady + 1;
dst[0*W+col] = temp[0*W+col]/len;
for (int i=1; i<=rady; i++) {
@@ -738,9 +601,11 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1);
len --;
}
buffer.release(pBuf);
}
}
delete buffer;
}
@@ -753,21 +618,17 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
AlignedBufferMP<float> buffer(W*H);
AlignedBuffer<float>* buffer = new AlignedBuffer<float> (W*H);
float* temp = buffer->data;
if (radx==0) {
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
temp[row*H+col] = fabs(src[row*W+col]);
}
buffer.release(pBuf);
}
} else {
//horizontal blur
//OpenMP here
@@ -775,9 +636,6 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
#pragma omp parallel for
#endif
for (int row = 0; row < H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = radx + 1;
temp[row*W+0] = fabs((float)src[row*W+0])/len;
for (int j=1; j<=radx; j++) {
@@ -794,7 +652,6 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
temp[row*W+col] = (temp[row*W+col-1]*len - fabs(src[row*W+col-radx-1]))/(len-1);
len --;
}
buffer.release(pBuf);
}
}
@@ -802,15 +659,10 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
#ifdef _OPENMP
#pragma omp parallel for
#endif
for (int row=0; row<H; row++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int row=0; row<H; row++)
for (int col=0; col<H; col++) {
dst[row*W+col] = temp[row*W+col];
}
buffer.release(pBuf);
}
} else {
//vertical blur
//OpenMP here
@@ -818,9 +670,6 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
#pragma omp parallel for
#endif
for (int col = 0; col < W; col++) {
AlignedBuffer<float>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
int len = rady + 1;
dst[0*W+col] = temp[0*W+col]/len;
for (int i=1; i<=rady; i++) {
@@ -837,12 +686,13 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
dst[row*W+col] = (dst[(row-1)*W+col]*len - temp[(row-rady-1)*W+col])/(len-1);
len --;
}
buffer.release(pBuf);
}
}
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
delete buffer;
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
}