OMP optimizations for NR
This commit is contained in:
@@ -80,7 +80,7 @@ Temperature=5745
|
|||||||
Green=1.0
|
Green=1.0
|
||||||
|
|
||||||
[Impulse Denoising]
|
[Impulse Denoising]
|
||||||
Enabled=true
|
Enabled=false
|
||||||
Threshold=50
|
Threshold=50
|
||||||
|
|
||||||
[Defringing]
|
[Defringing]
|
||||||
|
@@ -2,6 +2,10 @@
|
|||||||
#include "boxblur.h"
|
#include "boxblur.h"
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
//#define MAX(a,b) ((a)<(b)?(b):(a))
|
//#define MAX(a,b) ((a)<(b)?(b):(a))
|
||||||
//#define MIN(a,b) ((a)>(b)?(b):(a))
|
//#define MIN(a,b) ((a)>(b)?(b):(a))
|
||||||
|
|
||||||
@@ -62,9 +66,12 @@ float *EdgePreserveLab::CreateBlur(float *Source, float LScale, float abScale, f
|
|||||||
float * var = new float[w*h];
|
float * var = new float[w*h];
|
||||||
rtengine::boxvar(g, var, 1, 1, w, h);
|
rtengine::boxvar(g, var, 1, 1, w, h);
|
||||||
|
|
||||||
for(y = 0; y != h1; y++){
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
|
for(y = 0; y < h1; y++){
|
||||||
float *rg = &g[w*y];
|
float *rg = &g[w*y];
|
||||||
for(x = 0; x != w1; x++){
|
for(x = 0; x < w1; x++){
|
||||||
//Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient.
|
//Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient.
|
||||||
/*float gx = (fabs((rg[x + 1] - rg[x]) + (rg[x + w + 1] - rg[x + w])));
|
/*float gx = (fabs((rg[x + 1] - rg[x]) + (rg[x + w + 1] - rg[x + w])));
|
||||||
float gy = (fabs((rg[x + w] - rg[x]) + (rg[x + w + 1] - rg[x + 1])));
|
float gy = (fabs((rg[x + w] - rg[x]) + (rg[x + w + 1] - rg[x + 1])));
|
||||||
@@ -103,6 +110,8 @@ float *EdgePreserveLab::CreateBlur(float *Source, float LScale, float abScale, f
|
|||||||
memset(a_w1, 0, A->DiagonalLength(w - 1)*sizeof(float));
|
memset(a_w1, 0, A->DiagonalLength(w - 1)*sizeof(float));
|
||||||
memset(a_w, 0, A->DiagonalLength(w)*sizeof(float));
|
memset(a_w, 0, A->DiagonalLength(w)*sizeof(float));
|
||||||
memset(a_w_1, 0, A->DiagonalLength(w + 1)*sizeof(float));
|
memset(a_w_1, 0, A->DiagonalLength(w + 1)*sizeof(float));
|
||||||
|
|
||||||
|
//TODO: OMP here?
|
||||||
for(i = y = 0; y != h; y++){
|
for(i = y = 0; y != h; y++){
|
||||||
for(x = 0; x != w; x++, i++){
|
for(x = 0; x != w; x++, i++){
|
||||||
float ac;
|
float ac;
|
||||||
|
@@ -98,6 +98,9 @@ namespace rtengine {
|
|||||||
const short int imheight=src->height, imwidth=src->width;
|
const short int imheight=src->height, imwidth=src->width;
|
||||||
|
|
||||||
if (dnparams.luma==0 && dnparams.chroma==0) {//nothing to do; copy src to dst
|
if (dnparams.luma==0 && dnparams.chroma==0) {//nothing to do; copy src to dst
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i=0; i<imheight; i++) {
|
for (int i=0; i<imheight; i++) {
|
||||||
for (int j=0; j<imwidth; j++) {
|
for (int j=0; j<imwidth; j++) {
|
||||||
dst->r[i][j] = src->r[i][j];
|
dst->r[i][j] = src->r[i][j];
|
||||||
@@ -144,7 +147,10 @@ namespace rtengine {
|
|||||||
array2D<float> tilemask_out(TS,TS);
|
array2D<float> tilemask_out(TS,TS);
|
||||||
|
|
||||||
const int border = MAX(2,TS/16);
|
const int border = MAX(2,TS/16);
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i=0; i<TS; i++) {
|
for (int i=0; i<TS; i++) {
|
||||||
float i1 = abs((i>TS/2 ? i-TS+1 : i));
|
float i1 = abs((i>TS/2 ? i-TS+1 : i));
|
||||||
float vmask = (i1<border ? SQR(sin((M_PI*i1)/(2*border))) : 1.0f);
|
float vmask = (i1<border ? SQR(sin((M_PI*i1)/(2*border))) : 1.0f);
|
||||||
@@ -164,6 +170,9 @@ namespace rtengine {
|
|||||||
|
|
||||||
//output buffer
|
//output buffer
|
||||||
Imagefloat * dsttmp = new Imagefloat(imwidth,imheight);
|
Imagefloat * dsttmp = new Imagefloat(imwidth,imheight);
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int n=0; n<3*imwidth*imheight; n++) {
|
for (int n=0; n<3*imwidth*imheight; n++) {
|
||||||
dsttmp->data[n] = 0;
|
dsttmp->data[n] = 0;
|
||||||
}
|
}
|
||||||
@@ -196,7 +205,6 @@ namespace rtengine {
|
|||||||
//now we have tile dimensions, overlaps
|
//now we have tile dimensions, overlaps
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
|
||||||
for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) {
|
for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) {
|
||||||
for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) {
|
for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) {
|
||||||
|
|
||||||
@@ -214,6 +222,7 @@ namespace rtengine {
|
|||||||
//pixel weight
|
//pixel weight
|
||||||
array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks
|
array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks
|
||||||
|
|
||||||
|
// OMP candidate?
|
||||||
//fill tile from image; convert RGB to "luma/chroma"
|
//fill tile from image; convert RGB to "luma/chroma"
|
||||||
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++)
|
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++)
|
||||||
for (int j=tileleft, j1=0; j<tileright; j++, j1++) {
|
for (int j=tileleft, j1=0; j<tileright; j++, j1++) {
|
||||||
@@ -340,6 +349,9 @@ namespace rtengine {
|
|||||||
}//now we have a padded data row
|
}//now we have a padded data row
|
||||||
|
|
||||||
//now fill this row of the blocks with Lab high pass data
|
//now fill this row of the blocks with Lab high pass data
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int hblk=0; hblk<numblox_W; hblk++) {
|
for (int hblk=0; hblk<numblox_W; hblk++) {
|
||||||
int left = (hblk-blkrad)*offset;
|
int left = (hblk-blkrad)*offset;
|
||||||
int indx = (hblk)*TS;//index of block in malloc
|
int indx = (hblk)*TS;//index of block in malloc
|
||||||
@@ -361,6 +373,9 @@ namespace rtengine {
|
|||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
// now process the vblk row of blocks for noise reduction
|
// now process the vblk row of blocks for noise reduction
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int hblk=0; hblk<numblox_W; hblk++) {
|
for (int hblk=0; hblk<numblox_W; hblk++) {
|
||||||
|
|
||||||
RGBtile_denoise (fLblox, vblk, hblk, numblox_H, numblox_W, noisevar_Ldetail );
|
RGBtile_denoise (fLblox, vblk, hblk, numblox_H, numblox_W, noisevar_Ldetail );
|
||||||
@@ -395,7 +410,9 @@ namespace rtengine {
|
|||||||
fftwf_cleanup();
|
fftwf_cleanup();
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i=0; i<height; i++) {
|
for (int i=0; i<height; i++) {
|
||||||
for (int j=0; j<width; j++) {
|
for (int j=0; j<width; j++) {
|
||||||
//may want to include masking threshold for large hipass data to preserve edges/detail
|
//may want to include masking threshold for large hipass data to preserve edges/detail
|
||||||
@@ -427,6 +444,7 @@ namespace rtengine {
|
|||||||
if (tileright<imwidth) Hmask[width-1-i] = mask;
|
if (tileright<imwidth) Hmask[width-1-i] = mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//TODO: OMP candidate?
|
||||||
//convert back to RGB and write to destination array
|
//convert back to RGB and write to destination array
|
||||||
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++) {
|
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++) {
|
||||||
float X,Y,Z;
|
float X,Y,Z;
|
||||||
@@ -460,7 +478,7 @@ namespace rtengine {
|
|||||||
}//end of tile row
|
}//end of tile row
|
||||||
}//end of tile loop
|
}//end of tile loop
|
||||||
|
|
||||||
|
//TODO: is memcpy multithreaded - should this be replaced with the OMP-ed for loop?
|
||||||
//copy denoised image to output
|
//copy denoised image to output
|
||||||
memcpy (dst->data, dsttmp->data, 3*imwidth*imheight*sizeof(float));
|
memcpy (dst->data, dsttmp->data, 3*imwidth*imheight*sizeof(float));
|
||||||
|
|
||||||
@@ -482,7 +500,10 @@ namespace rtengine {
|
|||||||
int blkstart = hblproc*TS*TS;
|
int blkstart = hblproc*TS*TS;
|
||||||
|
|
||||||
boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT
|
boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int n=0; n<TS*TS; n++) { //for DCT
|
for (int n=0; n<TS*TS; n++) { //for DCT
|
||||||
fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail));
|
fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail));
|
||||||
}//output neighbor averaged result
|
}//output neighbor averaged result
|
||||||
@@ -502,7 +523,10 @@ namespace rtengine {
|
|||||||
{
|
{
|
||||||
const int numblox_W = ceil(((float)(width))/(offset));
|
const int numblox_W = ceil(((float)(width))/(offset));
|
||||||
const float DCTnorm = 1.0f/(4*TS*TS); //for DCT
|
const float DCTnorm = 1.0f/(4*TS*TS); //for DCT
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
//add row of tiles to output image
|
//add row of tiles to output image
|
||||||
for (int hblk=0; hblk < numblox_W; hblk++) {
|
for (int hblk=0; hblk < numblox_W; hblk++) {
|
||||||
int left = (hblk-blkrad)*offset;
|
int left = (hblk-blkrad)*offset;
|
||||||
@@ -544,6 +568,9 @@ namespace rtengine {
|
|||||||
for (int i=0; i<65536; i++) histo[i]=0;
|
for (int i=0; i<65536; i++) histo[i]=0;
|
||||||
|
|
||||||
//calculate histogram of absolute values of HH wavelet coeffs
|
//calculate histogram of absolute values of HH wavelet coeffs
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i=0; i<datalen; i++) {
|
for (int i=0; i<datalen; i++) {
|
||||||
histo[MAX(0,MIN(65535,abs((int)DataList[i])))]++;
|
histo[MAX(0,MIN(65535,abs((int)DataList[i])))]++;
|
||||||
}
|
}
|
||||||
@@ -743,7 +770,10 @@ namespace rtengine {
|
|||||||
wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float noisevar_ab )
|
wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float noisevar_ab )
|
||||||
{
|
{
|
||||||
int maxlvl = WaveletCoeffs_L.maxlevel();
|
int maxlvl = WaveletCoeffs_L.maxlevel();
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int lvl=0; lvl<maxlvl; lvl++) {
|
for (int lvl=0; lvl<maxlvl; lvl++) {
|
||||||
|
|
||||||
int Wlvl_L = WaveletCoeffs_L.level_W(lvl);
|
int Wlvl_L = WaveletCoeffs_L.level_W(lvl);
|
||||||
@@ -780,7 +810,9 @@ namespace rtengine {
|
|||||||
int max;
|
int max;
|
||||||
|
|
||||||
printf("\n level=%d \n",level);
|
printf("\n level=%d \n",level);
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int dir=1; dir<4; dir++) {
|
for (int dir=1; dir<4; dir++) {
|
||||||
float madL = SQR(MadMax(WavCoeffs_L[dir], max, W_L*H_L));
|
float madL = SQR(MadMax(WavCoeffs_L[dir], max, W_L*H_L));
|
||||||
float mada = SQR(MadMax(WavCoeffs_a[dir], max, W_ab*H_ab));
|
float mada = SQR(MadMax(WavCoeffs_a[dir], max, W_ab*H_ab));
|
||||||
@@ -794,7 +826,11 @@ namespace rtengine {
|
|||||||
float mad_b = madb*noisevar_ab;
|
float mad_b = madb*noisevar_ab;
|
||||||
|
|
||||||
if (noisevar_ab>0.01) {
|
if (noisevar_ab>0.01) {
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i=0; i<H_ab; i++) {
|
for (int i=0; i<H_ab; i++) {
|
||||||
for (int j=0; j<W_ab; j++) {
|
for (int j=0; j<W_ab; j++) {
|
||||||
|
|
||||||
@@ -818,6 +854,10 @@ namespace rtengine {
|
|||||||
|
|
||||||
boxblur(sfavea, sfavea, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
boxblur(sfavea, sfavea, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
||||||
boxblur(sfaveb, sfaveb, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
boxblur(sfaveb, sfaveb, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
||||||
|
//MK
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i=0; i<H_ab; i++)
|
for (int i=0; i<H_ab; i++)
|
||||||
for (int j=0; j<W_ab; j++) {
|
for (int j=0; j<W_ab; j++) {
|
||||||
|
|
||||||
@@ -840,6 +880,9 @@ namespace rtengine {
|
|||||||
|
|
||||||
if (noisevar_L>0.01) {
|
if (noisevar_L>0.01) {
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i=0; i<W_L*H_L; i++) {
|
for (int i=0; i<W_L*H_L; i++) {
|
||||||
|
|
||||||
float mag = SQR(WavCoeffs_L[dir][i]);
|
float mag = SQR(WavCoeffs_L[dir][i]);
|
||||||
@@ -850,6 +893,9 @@ namespace rtengine {
|
|||||||
}
|
}
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
boxblur(sfave, sfave, level+2, level+2, W_L, H_L);//increase smoothness by locally averaging shrinkage
|
boxblur(sfave, sfave, level+2, level+2, W_L, H_L);//increase smoothness by locally averaging shrinkage
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int i=0; i<W_L*H_L; i++) {
|
for (int i=0; i<W_L*H_L; i++) {
|
||||||
|
|
||||||
|
|
||||||
|
@@ -48,6 +48,9 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
|||||||
float* temp = buffer->data;
|
float* temp = buffer->data;
|
||||||
|
|
||||||
if (radx==0) {
|
if (radx==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
temp[row*H+col] = (float)src[row][col];
|
temp[row*H+col] = (float)src[row][col];
|
||||||
@@ -55,6 +58,9 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
|||||||
} else {
|
} else {
|
||||||
//horizontal blur
|
//horizontal blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row = 0; row < H; row++) {
|
for (int row = 0; row < H; row++) {
|
||||||
int len = radx + 1;
|
int len = radx + 1;
|
||||||
temp[row*W+0] = (float)src[row][0]/len;
|
temp[row*W+0] = (float)src[row][0]/len;
|
||||||
@@ -76,6 +82,9 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rady==0) {
|
if (rady==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
dst[row][col] = temp[row*W+col];
|
dst[row][col] = temp[row*W+col];
|
||||||
@@ -83,6 +92,9 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
|||||||
} else {
|
} else {
|
||||||
//vertical blur
|
//vertical blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int col = 0; col < W; col++) {
|
for (int col = 0; col < W; col++) {
|
||||||
int len = rady + 1;
|
int len = rady + 1;
|
||||||
dst[0][col] = temp[0*W+col]/len;
|
dst[0][col] = temp[0*W+col]/len;
|
||||||
@@ -127,6 +139,9 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
|
|||||||
} else {
|
} else {
|
||||||
//horizontal blur
|
//horizontal blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row = 0; row < H; row++) {
|
for (int row = 0; row < H; row++) {
|
||||||
int len = radx + 1;
|
int len = radx + 1;
|
||||||
temp[row*W+0] = (float)src[row*W+0]/len;
|
temp[row*W+0] = (float)src[row*W+0]/len;
|
||||||
@@ -148,6 +163,9 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rady==0) {
|
if (rady==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
dst[row*W+col] = temp[row*W+col];
|
dst[row*W+col] = temp[row*W+col];
|
||||||
@@ -155,6 +173,9 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
|
|||||||
} else {
|
} else {
|
||||||
//vertical blur
|
//vertical blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int col = 0; col < W; col++) {
|
for (int col = 0; col < W; col++) {
|
||||||
int len = rady + 1;
|
int len = rady + 1;
|
||||||
dst[0*W+col] = temp[0*W+col]/len;
|
dst[0*W+col] = temp[0*W+col]/len;
|
||||||
@@ -282,6 +303,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
|||||||
float* tempave = buffer2->data;
|
float* tempave = buffer2->data;
|
||||||
|
|
||||||
if (radx==0) {
|
if (radx==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
temp[row*H+col] = src[row*W+col];
|
temp[row*H+col] = src[row*W+col];
|
||||||
@@ -289,6 +313,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
|||||||
} else {
|
} else {
|
||||||
//horizontal blur
|
//horizontal blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row = 0; row < H; row++) {
|
for (int row = 0; row < H; row++) {
|
||||||
int len = radx + 1;
|
int len = radx + 1;
|
||||||
temp[row*W+0] = (float)src[row*W+0]/len;
|
temp[row*W+0] = (float)src[row*W+0]/len;
|
||||||
@@ -311,12 +338,18 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
|||||||
|
|
||||||
if (rady==0) {
|
if (rady==0) {
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
tempave[row*W+col] = temp[row*W+col];
|
tempave[row*W+col] = temp[row*W+col];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
//vertical blur
|
//vertical blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int col = 0; col < W; col++) {
|
for (int col = 0; col < W; col++) {
|
||||||
int len = rady + 1;
|
int len = rady + 1;
|
||||||
tempave[0*W+col] = temp[0*W+col]/len;
|
tempave[0*W+col] = temp[0*W+col]/len;
|
||||||
@@ -342,13 +375,19 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
|||||||
|
|
||||||
|
|
||||||
if (radx==0) {
|
if (radx==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
temp[row*H+col] = fabs(src[row*W+col]-tempave[row*W+col]);
|
temp[row*H+col] = fabs(src[row*W+col]-tempave[row*W+col]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
//horizontal blur
|
//horizontal blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row = 0; row < H; row++) {
|
for (int row = 0; row < H; row++) {
|
||||||
int len = radx + 1;
|
int len = radx + 1;
|
||||||
temp[row*W+0] = fabs(src[row*W+0]-tempave[row*W+0])/len;
|
temp[row*W+0] = fabs(src[row*W+0]-tempave[row*W+0])/len;
|
||||||
@@ -371,6 +410,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rady==0) {
|
if (rady==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
dst[row*W+col] = temp[row*W+col];
|
dst[row*W+col] = temp[row*W+col];
|
||||||
@@ -378,6 +420,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
|||||||
} else {
|
} else {
|
||||||
//vertical blur
|
//vertical blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int col = 0; col < W; col++) {
|
for (int col = 0; col < W; col++) {
|
||||||
int len = rady + 1;
|
int len = rady + 1;
|
||||||
dst[0*W+col] = temp[0*W+col]/len;
|
dst[0*W+col] = temp[0*W+col]/len;
|
||||||
@@ -416,6 +461,9 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
|
|||||||
float* temp = buffer->data;
|
float* temp = buffer->data;
|
||||||
|
|
||||||
if (radx==0) {
|
if (radx==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
temp[row*H+col] = SQR(src[row*W+col]);
|
temp[row*H+col] = SQR(src[row*W+col]);
|
||||||
@@ -423,6 +471,9 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
|
|||||||
} else {
|
} else {
|
||||||
//horizontal blur
|
//horizontal blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row = 0; row < H; row++) {
|
for (int row = 0; row < H; row++) {
|
||||||
int len = radx + 1;
|
int len = radx + 1;
|
||||||
temp[row*W+0] = SQR((float)src[row*W+0])/len;
|
temp[row*W+0] = SQR((float)src[row*W+0])/len;
|
||||||
@@ -444,6 +495,9 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rady==0) {
|
if (rady==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
dst[row*W+col] = temp[row*W+col];
|
dst[row*W+col] = temp[row*W+col];
|
||||||
@@ -451,6 +505,9 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
|
|||||||
} else {
|
} else {
|
||||||
//vertical blur
|
//vertical blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int col = 0; col < W; col++) {
|
for (int col = 0; col < W; col++) {
|
||||||
int len = rady + 1;
|
int len = rady + 1;
|
||||||
dst[0*W+col] = temp[0*W+col]/len;
|
dst[0*W+col] = temp[0*W+col]/len;
|
||||||
@@ -488,6 +545,9 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
|
|||||||
float* temp = buffer->data;
|
float* temp = buffer->data;
|
||||||
|
|
||||||
if (radx==0) {
|
if (radx==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++) {
|
for (int row=0; row<H; row++) {
|
||||||
int rr = min(H-1,max(0,row+dy));
|
int rr = min(H-1,max(0,row+dy));
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
@@ -498,6 +558,9 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
|
|||||||
} else {
|
} else {
|
||||||
//horizontal blur
|
//horizontal blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row = 0; row < H; row++) {
|
for (int row = 0; row < H; row++) {
|
||||||
int len = radx + 1;
|
int len = radx + 1;
|
||||||
int rr = min(H-1,max(0,row+dy));
|
int rr = min(H-1,max(0,row+dy));
|
||||||
@@ -527,6 +590,9 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rady==0) {
|
if (rady==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
dst[row*W+col] = temp[row*W+col];
|
dst[row*W+col] = temp[row*W+col];
|
||||||
@@ -534,6 +600,9 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
|
|||||||
} else {
|
} else {
|
||||||
//vertical blur
|
//vertical blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int col = 0; col < W; col++) {
|
for (int col = 0; col < W; col++) {
|
||||||
int len = rady + 1;
|
int len = rady + 1;
|
||||||
dst[0*W+col] = temp[0*W+col]/len;
|
dst[0*W+col] = temp[0*W+col]/len;
|
||||||
@@ -572,6 +641,9 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
|
|||||||
float* temp = buffer->data;
|
float* temp = buffer->data;
|
||||||
|
|
||||||
if (radx==0) {
|
if (radx==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
temp[row*H+col] = fabs(src[row*W+col]);
|
temp[row*H+col] = fabs(src[row*W+col]);
|
||||||
@@ -579,6 +651,9 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
|
|||||||
} else {
|
} else {
|
||||||
//horizontal blur
|
//horizontal blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row = 0; row < H; row++) {
|
for (int row = 0; row < H; row++) {
|
||||||
int len = radx + 1;
|
int len = radx + 1;
|
||||||
temp[row*W+0] = fabs((float)src[row*W+0])/len;
|
temp[row*W+0] = fabs((float)src[row*W+0])/len;
|
||||||
@@ -600,6 +675,9 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (rady==0) {
|
if (rady==0) {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int row=0; row<H; row++)
|
for (int row=0; row<H; row++)
|
||||||
for (int col=0; col<H; col++) {
|
for (int col=0; col<H; col++) {
|
||||||
dst[row*W+col] = temp[row*W+col];
|
dst[row*W+col] = temp[row*W+col];
|
||||||
@@ -607,6 +685,9 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
|
|||||||
} else {
|
} else {
|
||||||
//vertical blur
|
//vertical blur
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
for (int col = 0; col < W; col++) {
|
for (int col = 0; col < W; col++) {
|
||||||
int len = rady + 1;
|
int len = rady + 1;
|
||||||
dst[0*W+col] = temp[0*W+col]/len;
|
dst[0*W+col] = temp[0*W+col]/len;
|
||||||
|
Reference in New Issue
Block a user