OMP mods in FTblockDN.cc (backing out those changes that lead to artifacts)
More work is needed here to utilise AlignedBufferMP
This commit is contained in:
@@ -205,6 +205,7 @@ namespace rtengine {
|
|||||||
//now we have tile dimensions, overlaps
|
//now we have tile dimensions, overlaps
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
//adding omp here slows it down
|
||||||
for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) {
|
for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) {
|
||||||
for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) {
|
for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) {
|
||||||
|
|
||||||
@@ -222,10 +223,15 @@ namespace rtengine {
|
|||||||
//pixel weight
|
//pixel weight
|
||||||
array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks
|
array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks
|
||||||
|
|
||||||
// OMP candidate?
|
//#ifdef _OPENMP
|
||||||
|
//#pragma omp parallel for
|
||||||
|
//#endif
|
||||||
|
//TODO: implement using AlignedBufferMP
|
||||||
//fill tile from image; convert RGB to "luma/chroma"
|
//fill tile from image; convert RGB to "luma/chroma"
|
||||||
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++)
|
for (int i=tiletop/*, i1=0*/; i<tilebottom; i++/*, i1++*/) {
|
||||||
for (int j=tileleft, j1=0; j<tileright; j++, j1++) {
|
int i1 = i - tiletop;
|
||||||
|
for (int j=tileleft/*, j1=0*/; j<tileright; j++/*, j1++*/) {
|
||||||
|
int j1 = j - tileleft;
|
||||||
|
|
||||||
float X = gain*src->r[i][j];//xyz_prophoto[0][0]*src->r[i][j] + xyz_prophoto[0][1]*src->g[i][j] + xyz_prophoto[0][2]*src->b[i][j];
|
float X = gain*src->r[i][j];//xyz_prophoto[0][0]*src->r[i][j] + xyz_prophoto[0][1]*src->g[i][j] + xyz_prophoto[0][2]*src->b[i][j];
|
||||||
float Y = gain*src->g[i][j];//xyz_prophoto[1][0]*src->r[i][j] + xyz_prophoto[1][1]*src->g[i][j] + xyz_prophoto[1][2]*src->b[i][j];
|
float Y = gain*src->g[i][j];//xyz_prophoto[1][0]*src->r[i][j] + xyz_prophoto[1][1]*src->g[i][j] + xyz_prophoto[1][2]*src->b[i][j];
|
||||||
@@ -243,6 +249,7 @@ namespace rtengine {
|
|||||||
Lin[i1][j1] = Y;
|
Lin[i1][j1] = Y;
|
||||||
totwt[i1][j1] = 0;
|
totwt[i1][j1] = 0;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//initial impulse denoise
|
//initial impulse denoise
|
||||||
if (dnparams.luma>0.01) {
|
if (dnparams.luma>0.01) {
|
||||||
@@ -319,6 +326,7 @@ namespace rtengine {
|
|||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
// Main detail recovery algorithm: Block loop
|
// Main detail recovery algorithm: Block loop
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
|
//adding omp here leads to artifacts
|
||||||
for (int vblk=0; vblk<numblox_H; vblk++) {
|
for (int vblk=0; vblk<numblox_H; vblk++) {
|
||||||
//printf("vblock=%d",vblk);
|
//printf("vblock=%d",vblk);
|
||||||
int vblkmod = vblk%8;
|
int vblkmod = vblk%8;
|
||||||
@@ -328,8 +336,12 @@ namespace rtengine {
|
|||||||
float * buffer = new float [width + TS + 2*blkrad*offset];
|
float * buffer = new float [width + TS + 2*blkrad*offset];
|
||||||
float * datarow = buffer+blkrad*offset;
|
float * datarow = buffer+blkrad*offset;
|
||||||
|
|
||||||
for (int i=0, row=top; i<TS; i++, row++) {
|
//#ifdef _OPENMP
|
||||||
|
//#pragma omp parallel for
|
||||||
|
//#endif
|
||||||
|
//TODO: implement using AlignedBufferMP
|
||||||
|
for (int i=0/*, row=top*/; i<TS; i++/*, row++*/) {
|
||||||
|
int row = top + i;
|
||||||
int rr = row;
|
int rr = row;
|
||||||
if (row<0) {
|
if (row<0) {
|
||||||
rr = MIN(-row,height-1);
|
rr = MIN(-row,height-1);
|
||||||
@@ -349,9 +361,7 @@ namespace rtengine {
|
|||||||
}//now we have a padded data row
|
}//now we have a padded data row
|
||||||
|
|
||||||
//now fill this row of the blocks with Lab high pass data
|
//now fill this row of the blocks with Lab high pass data
|
||||||
#ifdef _OPENMP
|
//OMP here does not add speed, better handled on the outside loop
|
||||||
#pragma omp parallel for
|
|
||||||
#endif
|
|
||||||
for (int hblk=0; hblk<numblox_W; hblk++) {
|
for (int hblk=0; hblk<numblox_W; hblk++) {
|
||||||
int left = (hblk-blkrad)*offset;
|
int left = (hblk-blkrad)*offset;
|
||||||
int indx = (hblk)*TS;//index of block in malloc
|
int indx = (hblk)*TS;//index of block in malloc
|
||||||
@@ -410,9 +420,10 @@ namespace rtengine {
|
|||||||
fftwf_cleanup();
|
fftwf_cleanup();
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
#ifdef _OPENMP
|
//#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
//#pragma omp parallel for
|
||||||
#endif
|
//#endif
|
||||||
|
//TODO: implement using AlignedBufferMP
|
||||||
for (int i=0; i<height; i++) {
|
for (int i=0; i<height; i++) {
|
||||||
for (int j=0; j<width; j++) {
|
for (int j=0; j<width; j++) {
|
||||||
//may want to include masking threshold for large hipass data to preserve edges/detail
|
//may want to include masking threshold for large hipass data to preserve edges/detail
|
||||||
@@ -444,11 +455,15 @@ namespace rtengine {
|
|||||||
if (tileright<imwidth) Hmask[width-1-i] = mask;
|
if (tileright<imwidth) Hmask[width-1-i] = mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: OMP candidate?
|
#ifdef _OPENMP
|
||||||
|
#pragma omp parallel for
|
||||||
|
#endif
|
||||||
//convert back to RGB and write to destination array
|
//convert back to RGB and write to destination array
|
||||||
for (int i=tiletop, i1=0; i<tilebottom; i++, i1++) {
|
for (int i=tiletop/* i1=0*/; i<tilebottom; i++/*, i1++*/){
|
||||||
|
int i1 = i-tiletop;
|
||||||
float X,Y,Z;
|
float X,Y,Z;
|
||||||
for (int j=tileleft, j1=0; j<tileright; j++, j1++) {
|
for (int j=tileleft/*, j1=0*/; j<tileright; j++/*, j1++*/) {
|
||||||
|
int j1=j-tileleft;
|
||||||
|
|
||||||
Y = labdn->L[i1][j1];
|
Y = labdn->L[i1][j1];
|
||||||
X = (labdn->a[i1][j1]) + Y;
|
X = (labdn->a[i1][j1]) + Y;
|
||||||
@@ -501,9 +516,10 @@ namespace rtengine {
|
|||||||
|
|
||||||
boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT
|
boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT
|
||||||
|
|
||||||
#ifdef _OPENMP
|
//#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
//#pragma omp parallel for
|
||||||
#endif
|
//#endif
|
||||||
|
//TODO: implement using AlignedBufferMP
|
||||||
for (int n=0; n<TS*TS; n++) { //for DCT
|
for (int n=0; n<TS*TS; n++) { //for DCT
|
||||||
fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail));
|
fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail));
|
||||||
}//output neighbor averaged result
|
}//output neighbor averaged result
|
||||||
@@ -854,7 +870,7 @@ namespace rtengine {
|
|||||||
|
|
||||||
boxblur(sfavea, sfavea, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
boxblur(sfavea, sfavea, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
||||||
boxblur(sfaveb, sfaveb, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
boxblur(sfaveb, sfaveb, level+2, level+2, W_ab, H_ab);//increase smoothness by locally averaging shrinkage
|
||||||
//MK
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user