OMP optimizations for NR
This commit is contained in:
@@ -80,7 +80,7 @@ Temperature=5745
|
||||
Green=1.0
|
||||
|
||||
[Impulse Denoising]
|
||||
Enabled=true
|
||||
Enabled=false
|
||||
Threshold=50
|
||||
|
||||
[Defringing]
|
||||
|
@@ -2,6 +2,10 @@
|
||||
#include "boxblur.h"
|
||||
#include <cstdlib>
|
||||
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
//#define MAX(a,b) ((a)<(b)?(b):(a))
|
||||
//#define MIN(a,b) ((a)>(b)?(b):(a))
|
||||
|
||||
@@ -62,9 +66,12 @@ float *EdgePreserveLab::CreateBlur(float *Source, float LScale, float abScale, f
|
||||
float * var = new float[w*h];
|
||||
rtengine::boxvar(g, var, 1, 1, w, h);
|
||||
|
||||
for(y = 0; y != h1; y++){
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(y = 0; y < h1; y++){
|
||||
float *rg = &g[w*y];
|
||||
for(x = 0; x != w1; x++){
|
||||
for(x = 0; x < w1; x++){
|
||||
//Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient.
|
||||
/*float gx = (fabs((rg[x + 1] - rg[x]) + (rg[x + w + 1] - rg[x + w])));
|
||||
float gy = (fabs((rg[x + w] - rg[x]) + (rg[x + w + 1] - rg[x + 1])));
|
||||
@@ -103,6 +110,8 @@ float *EdgePreserveLab::CreateBlur(float *Source, float LScale, float abScale, f
|
||||
memset(a_w1, 0, A->DiagonalLength(w - 1)*sizeof(float));
|
||||
memset(a_w, 0, A->DiagonalLength(w)*sizeof(float));
|
||||
memset(a_w_1, 0, A->DiagonalLength(w + 1)*sizeof(float));
|
||||
|
||||
//TODO: OMP here?
|
||||
for(i = y = 0; y != h; y++){
|
||||
for(x = 0; x != w; x++, i++){
|
||||
float ac;
|
||||
|
@@ -98,6 +98,9 @@ namespace rtengine {
|
||||
const short int imheight=src->height, imwidth=src->width;
|
||||
|
||||
if (dnparams.luma==0 && dnparams.chroma==0) {//nothing to do; copy src to dst
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<imheight; i++) {
|
||||
for (int j=0; j<imwidth; j++) {
|
||||
dst->r[i][j] = src->r[i][j];
|
||||
@@ -145,6 +148,9 @@ namespace rtengine {
|
||||
|
||||
const int border = MAX(2,TS/16);
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<TS; i++) {
|
||||
float i1 = abs((i>TS/2 ? i-TS+1 : i));
|
||||
float vmask = (i1<border ? SQR(sin((M_PI*i1)/(2*border))) : 1.0f);
|
||||
@@ -164,6 +170,9 @@ namespace rtengine {
|
||||
|
||||
//output buffer
|
||||
Imagefloat * dsttmp = new Imagefloat(imwidth,imheight);
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int n=0; n<3*imwidth*imheight; n++) {
|
||||
dsttmp->data[n] = 0;
|
||||
}
|
||||
@@ -196,7 +205,6 @@ namespace rtengine {
|
||||
//now we have tile dimensions, overlaps
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
|
||||
for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) {
|
||||
for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) {
|
||||
|
||||
@@ -214,6 +222,7 @@ namespace rtengine {
|
||||
//pixel weight
|
||||
array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks
|
||||
|
||||
// OMP candidate?
|
||||
//fill tile from image; convert RGB to "luma/chroma"
|
||||
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++)
|
||||
for (int j=tileleft, j1=0; j<tileright; j++, j1++) {
|
||||
@@ -340,6 +349,9 @@ namespace rtengine {
|
||||
}//now we have a padded data row
|
||||
|
||||
//now fill this row of the blocks with Lab high pass data
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int hblk=0; hblk<numblox_W; hblk++) {
|
||||
int left = (hblk-blkrad)*offset;
|
||||
int indx = (hblk)*TS;//index of block in malloc
|
||||
@@ -361,6 +373,9 @@ namespace rtengine {
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
// now process the vblk row of blocks for noise reduction
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int hblk=0; hblk<numblox_W; hblk++) {
|
||||
|
||||
RGBtile_denoise (fLblox, vblk, hblk, numblox_H, numblox_W, noisevar_Ldetail );
|
||||
@@ -395,7 +410,9 @@ namespace rtengine {
|
||||
fftwf_cleanup();
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<height; i++) {
|
||||
for (int j=0; j<width; j++) {
|
||||
//may want to include masking threshold for large hipass data to preserve edges/detail
|
||||
@@ -427,6 +444,7 @@ namespace rtengine {
|
||||
if (tileright<imwidth) Hmask[width-1-i] = mask;
|
||||
}
|
||||
|
||||
//TODO: OMP candidate?
|
||||
//convert back to RGB and write to destination array
|
||||
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++) {
|
||||
float X,Y,Z;
|
||||
@@ -460,7 +478,7 @@ namespace rtengine {
|
||||
}//end of tile row
|
||||
}//end of tile loop
|
||||
|
||||
|
||||
//TODO: is memcpy multithreaded - should this be replaced with the OMP-ed for loop?
|
||||
//copy denoised image to output
|
||||
memcpy (dst->data, dsttmp->data, 3*imwidth*imheight*sizeof(float));
|
||||
|
||||
@@ -483,6 +501,9 @@ namespace rtengine {
|
||||
|
||||
boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int n=0; n<TS*TS; n++) { //for DCT
|
||||
fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail));
|
||||
}//output neighbor averaged result
|
||||
@@ -503,6 +524,9 @@ namespace rtengine {
|
||||
const int numblox_W = ceil(((float)(width))/(offset));
|
||||
const float DCTnorm = 1.0f/(4*TS*TS); //for DCT
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
//add row of tiles to output image
|
||||
for (int hblk=0; hblk < numblox_W; hblk++) {
|
||||
int left = (hblk-blkrad)*offset;
|
||||
@@ -544,6 +568,9 @@ namespace rtengine {
|
||||
for (int i=0; i<65536; i++) histo[i]=0;
|
||||
|
||||
//calculate histogram of absolute values of HH wavelet coeffs
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<datalen; i++) {
|
||||
histo[MAX(0,MIN(65535,abs((int)DataList[i])))]++;
|
||||
}
|
||||
@@ -744,6 +771,9 @@ namespace rtengine {
|
||||
{
|
||||
int maxlvl = WaveletCoeffs_L.maxlevel();
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int lvl=0; lvl<maxlvl; lvl++) {
|
||||
|
||||
int Wlvl_L = WaveletCoeffs_L.level_W(lvl);
|
||||
@@ -780,7 +810,9 @@ namespace rtengine {
|
||||
int max;
|
||||
|
||||
printf("\n level=%d \n",level);
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int dir=1; dir<4; dir++) {
|
||||
float madL = SQR(MadMax(WavCoeffs_L[dir], max, W_L*H_L));
|
||||
float mada = SQR(MadMax(WavCoeffs_a[dir], max, W_ab*H_ab));
|
||||
@@ -795,6 +827,10 @@ namespace rtengine {
|
||||
|
||||
if (noisevar_ab>0.01) {
|
||||
//OpenMP here
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<H_ab; i++) {
|
||||
for (int j=0; j<W_ab; j++) {
|
||||
|
||||
@@ -818,6 +854,10 @@ namespace rtengine {
|
||||
|
||||
boxblur(sfavea, sfavea, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
||||
boxblur(sfaveb, sfaveb, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
||||
//MK
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<H_ab; i++)
|
||||
for (int j=0; j<W_ab; j++) {
|
||||
|
||||
@@ -840,6 +880,9 @@ namespace rtengine {
|
||||
|
||||
if (noisevar_L>0.01) {
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<W_L*H_L; i++) {
|
||||
|
||||
float mag = SQR(WavCoeffs_L[dir][i]);
|
||||
@@ -850,6 +893,9 @@ namespace rtengine {
|
||||
}
|
||||
//OpenMP here
|
||||
boxblur(sfave, sfave, level+2, level+2, W_L, H_L);//increase smoothness by locally averaging shrinkage
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<W_L*H_L; i++) {
|
||||
|
||||
|
||||
|
@@ -48,6 +48,9 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
||||
float* temp = buffer->data;
|
||||
|
||||
if (radx==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
temp[row*H+col] = (float)src[row][col];
|
||||
@@ -55,6 +58,9 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
||||
} else {
|
||||
//horizontal blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
temp[row*W+0] = (float)src[row][0]/len;
|
||||
@@ -76,6 +82,9 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
||||
}
|
||||
|
||||
if (rady==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
dst[row][col] = temp[row*W+col];
|
||||
@@ -83,6 +92,9 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
||||
} else {
|
||||
//vertical blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0][col] = temp[0*W+col]/len;
|
||||
@@ -127,6 +139,9 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
|
||||
} else {
|
||||
//horizontal blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
temp[row*W+0] = (float)src[row*W+0]/len;
|
||||
@@ -148,6 +163,9 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
|
||||
}
|
||||
|
||||
if (rady==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
dst[row*W+col] = temp[row*W+col];
|
||||
@@ -155,6 +173,9 @@ template<class T, class A> void boxblur (T* src, A* dst, int radx, int rady, int
|
||||
} else {
|
||||
//vertical blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0*W+col] = temp[0*W+col]/len;
|
||||
@@ -282,6 +303,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
||||
float* tempave = buffer2->data;
|
||||
|
||||
if (radx==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
temp[row*H+col] = src[row*W+col];
|
||||
@@ -289,6 +313,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
||||
} else {
|
||||
//horizontal blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
temp[row*W+0] = (float)src[row*W+0]/len;
|
||||
@@ -311,12 +338,18 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
||||
|
||||
if (rady==0) {
|
||||
for (int row=0; row<H; row++)
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int col=0; col<H; col++) {
|
||||
tempave[row*W+col] = temp[row*W+col];
|
||||
}
|
||||
} else {
|
||||
//vertical blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
tempave[0*W+col] = temp[0*W+col]/len;
|
||||
@@ -342,6 +375,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
||||
|
||||
|
||||
if (radx==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
temp[row*H+col] = fabs(src[row*W+col]-tempave[row*W+col]);
|
||||
@@ -349,6 +385,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
||||
} else {
|
||||
//horizontal blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
temp[row*W+0] = fabs(src[row*W+0]-tempave[row*W+0])/len;
|
||||
@@ -371,6 +410,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
||||
}
|
||||
|
||||
if (rady==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
dst[row*W+col] = temp[row*W+col];
|
||||
@@ -378,6 +420,9 @@ template<typename T> void boxdev (T* src, T* dst, int radx, int rady, int W, int
|
||||
} else {
|
||||
//vertical blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0*W+col] = temp[0*W+col]/len;
|
||||
@@ -416,6 +461,9 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
|
||||
float* temp = buffer->data;
|
||||
|
||||
if (radx==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
temp[row*H+col] = SQR(src[row*W+col]);
|
||||
@@ -423,6 +471,9 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
|
||||
} else {
|
||||
//horizontal blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
temp[row*W+0] = SQR((float)src[row*W+0])/len;
|
||||
@@ -444,6 +495,9 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
|
||||
}
|
||||
|
||||
if (rady==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
dst[row*W+col] = temp[row*W+col];
|
||||
@@ -451,6 +505,9 @@ template<class T, class A> void boxsqblur (T* src, A* dst, int radx, int rady, i
|
||||
} else {
|
||||
//vertical blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0*W+col] = temp[0*W+col]/len;
|
||||
@@ -488,6 +545,9 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
|
||||
float* temp = buffer->data;
|
||||
|
||||
if (radx==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++) {
|
||||
int rr = min(H-1,max(0,row+dy));
|
||||
for (int col=0; col<H; col++) {
|
||||
@@ -498,6 +558,9 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
|
||||
} else {
|
||||
//horizontal blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
int rr = min(H-1,max(0,row+dy));
|
||||
@@ -527,6 +590,9 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
|
||||
}
|
||||
|
||||
if (rady==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
dst[row*W+col] = temp[row*W+col];
|
||||
@@ -534,6 +600,9 @@ template<class T, class A> void boxcorrelate (T* src, A* dst, int dx, int dy, in
|
||||
} else {
|
||||
//vertical blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0*W+col] = temp[0*W+col]/len;
|
||||
@@ -572,6 +641,9 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
|
||||
float* temp = buffer->data;
|
||||
|
||||
if (radx==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
temp[row*H+col] = fabs(src[row*W+col]);
|
||||
@@ -579,6 +651,9 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
|
||||
} else {
|
||||
//horizontal blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
temp[row*W+0] = fabs((float)src[row*W+0])/len;
|
||||
@@ -600,6 +675,9 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
|
||||
}
|
||||
|
||||
if (rady==0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int row=0; row<H; row++)
|
||||
for (int col=0; col<H; col++) {
|
||||
dst[row*W+col] = temp[row*W+col];
|
||||
@@ -607,6 +685,9 @@ template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady,
|
||||
} else {
|
||||
//vertical blur
|
||||
//OpenMP here
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0*W+col] = temp[0*W+col]/len;
|
||||
|
Reference in New Issue
Block a user