From 856ecbab68c70eb851ad1a2e4f5520c7d4cff970 Mon Sep 17 00:00:00 2001 From: michael Date: Wed, 4 Jul 2012 18:49:09 -0400 Subject: [PATCH] OMP optimizations for NR --- rtdata/profiles/Default-ISO-Medium.pp3 | 2 +- rtengine/EdgePreserveLab.cc | 13 +++- rtengine/FTblockDN.cc | 64 +++++++++++++++++--- rtengine/boxblur.h | 83 +++++++++++++++++++++++++- 4 files changed, 149 insertions(+), 13 deletions(-) diff --git a/rtdata/profiles/Default-ISO-Medium.pp3 b/rtdata/profiles/Default-ISO-Medium.pp3 index 81e63a560..03f2ad4cf 100644 --- a/rtdata/profiles/Default-ISO-Medium.pp3 +++ b/rtdata/profiles/Default-ISO-Medium.pp3 @@ -80,7 +80,7 @@ Temperature=5745 Green=1.0 [Impulse Denoising] -Enabled=true +Enabled=false Threshold=50 [Defringing] diff --git a/rtengine/EdgePreserveLab.cc b/rtengine/EdgePreserveLab.cc index ad7723b49..967555a94 100644 --- a/rtengine/EdgePreserveLab.cc +++ b/rtengine/EdgePreserveLab.cc @@ -2,6 +2,10 @@ #include "boxblur.h" #include +#ifdef _OPENMP +#include +#endif + //#define MAX(a,b) ((a)<(b)?(b):(a)) //#define MIN(a,b) ((a)>(b)?(b):(a)) @@ -62,9 +66,12 @@ float *EdgePreserveLab::CreateBlur(float *Source, float LScale, float abScale, f float * var = new float[w*h]; rtengine::boxvar(g, var, 1, 1, w, h); - for(y = 0; y != h1; y++){ +#ifdef _OPENMP +#pragma omp parallel for +#endif + for(y = 0; y < h1; y++){ float *rg = &g[w*y]; - for(x = 0; x != w1; x++){ + for(x = 0; x < w1; x++){ //Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient. /*float gx = (fabs((rg[x + 1] - rg[x]) + (rg[x + w + 1] - rg[x + w]))); float gy = (fabs((rg[x + w] - rg[x]) + (rg[x + w + 1] - rg[x + 1]))); @@ -103,6 +110,8 @@ float *EdgePreserveLab::CreateBlur(float *Source, float LScale, float abScale, f memset(a_w1, 0, A->DiagonalLength(w - 1)*sizeof(float)); memset(a_w, 0, A->DiagonalLength(w)*sizeof(float)); memset(a_w_1, 0, A->DiagonalLength(w + 1)*sizeof(float)); + +//TODO: OMP here? for(i = y = 0; y != h; y++){ for(x = 0; x != w; x++, i++){ float ac; diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index 0746cf534..975fb3892 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -98,6 +98,9 @@ namespace rtengine { const short int imheight=src->height, imwidth=src->width; if (dnparams.luma==0 && dnparams.chroma==0) {//nothing to do; copy src to dst +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int i=0; ir[i][j] = src->r[i][j]; @@ -144,7 +147,10 @@ namespace rtengine { array2D tilemask_out(TS,TS); const int border = MAX(2,TS/16); - + +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int i=0; iTS/2 ? i-TS+1 : i)); float vmask = (i1data[n] = 0; } @@ -196,7 +205,6 @@ namespace rtengine { //now we have tile dimensions, overlaps //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - for (int tiletop=0; tiletop totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks +// OMP candidate? //fill tile from image; convert RGB to "luma/chroma" for (int i=tiletop, i1=0; idata, dsttmp->data, 3*imwidth*imheight*sizeof(float)); @@ -482,7 +500,10 @@ namespace rtengine { int blkstart = hblproc*TS*TS; boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT - + +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int n=0; n0.01) { -//OpenMP here +//OpenMP here + +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int i=0; i0.01) { //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int i=0; i void boxblur (T** src, A** dst, int radx, int rady, i float* temp = buffer->data; if (radx==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxblur (T** src, A** dst, int radx, int rady, i } else { //horizontal blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row = 0; row < H; row++) { int len = radx + 1; temp[row*W+0] = (float)src[row][0]/len; @@ -76,6 +82,9 @@ template void boxblur (T** src, A** dst, int radx, int rady, i } if (rady==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxblur (T** src, A** dst, int radx, int rady, i } else { //vertical blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int col = 0; col < W; col++) { int len = rady + 1; dst[0][col] = temp[0*W+col]/len; @@ -127,6 +139,9 @@ template void boxblur (T* src, A* dst, int radx, int rady, int } else { //horizontal blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row = 0; row < H; row++) { int len = radx + 1; temp[row*W+0] = (float)src[row*W+0]/len; @@ -148,6 +163,9 @@ template void boxblur (T* src, A* dst, int radx, int rady, int } if (rady==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxblur (T* src, A* dst, int radx, int rady, int } else { //vertical blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int col = 0; col < W; col++) { int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; @@ -282,6 +303,9 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int float* tempave = buffer2->data; if (radx==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxdev (T* src, T* dst, int radx, int rady, int W, int } else { //horizontal blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row = 0; row < H; row++) { int len = radx + 1; temp[row*W+0] = (float)src[row*W+0]/len; @@ -311,12 +338,18 @@ template void boxdev (T* src, T* dst, int radx, int rady, int W, int if (rady==0) { for (int row=0; row void boxdev (T* src, T* dst, int radx, int rady, int W, int if (radx==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxdev (T* src, T* dst, int radx, int rady, int W, int } if (rady==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxdev (T* src, T* dst, int radx, int rady, int W, int } else { //vertical blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int col = 0; col < W; col++) { int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; @@ -416,6 +461,9 @@ template void boxsqblur (T* src, A* dst, int radx, int rady, i float* temp = buffer->data; if (radx==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxsqblur (T* src, A* dst, int radx, int rady, i } else { //horizontal blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row = 0; row < H; row++) { int len = radx + 1; temp[row*W+0] = SQR((float)src[row*W+0])/len; @@ -444,6 +495,9 @@ template void boxsqblur (T* src, A* dst, int radx, int rady, i } if (rady==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxsqblur (T* src, A* dst, int radx, int rady, i } else { //vertical blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int col = 0; col < W; col++) { int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; @@ -488,6 +545,9 @@ template void boxcorrelate (T* src, A* dst, int dx, int dy, in float* temp = buffer->data; if (radx==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxcorrelate (T* src, A* dst, int dx, int dy, in } else { //horizontal blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row = 0; row < H; row++) { int len = radx + 1; int rr = min(H-1,max(0,row+dy)); @@ -527,6 +590,9 @@ template void boxcorrelate (T* src, A* dst, int dx, int dy, in } if (rady==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxcorrelate (T* src, A* dst, int dx, int dy, in } else { //vertical blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int col = 0; col < W; col++) { int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len; @@ -572,6 +641,9 @@ template void boxabsblur (T* src, A* dst, int radx, int rady, float* temp = buffer->data; if (radx==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxabsblur (T* src, A* dst, int radx, int rady, } else { //horizontal blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row = 0; row < H; row++) { int len = radx + 1; temp[row*W+0] = fabs((float)src[row*W+0])/len; @@ -600,6 +675,9 @@ template void boxabsblur (T* src, A* dst, int radx, int rady, } if (rady==0) { +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int row=0; row void boxabsblur (T* src, A* dst, int radx, int rady, } else { //vertical blur //OpenMP here +#ifdef _OPENMP +#pragma omp parallel for +#endif for (int col = 0; col < W; col++) { int len = rady + 1; dst[0*W+col] = temp[0*W+col]/len;