Performance optimization for impulse_nr on multi-core-systems
This commit is contained in:
parent
0a91af9c12
commit
7918fab562
@ -192,7 +192,58 @@ namespace rtengine {
|
|||||||
//now we have tile dimensions, overlaps
|
//now we have tile dimensions, overlaps
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
//adding omp here slows it down
|
// According to FFTW-Doc 'it is safe to execute the same plan in parallel by multiple threads', so we now create 4 plans
|
||||||
|
// outside the parallel region and use them inside the parallel region.
|
||||||
|
|
||||||
|
// calculate max size of numblox_W.
|
||||||
|
int max_numblox_W = ceil(((float)(MIN(imwidth,tilewidth)))/(offset))+2*blkrad;
|
||||||
|
// calculate min size of numblox_W.
|
||||||
|
int min_numblox_W = ceil(((float)((MIN(imwidth,((numtiles_W - 1) * tileWskip) + tilewidth) ) - ((numtiles_W - 1) * tileWskip)))/(offset))+2*blkrad;
|
||||||
|
|
||||||
|
// these are needed only for creation of the plans and will be freed before entering the parallel loop
|
||||||
|
float * Lbloxtmp;
|
||||||
|
float * fLbloxtmp;
|
||||||
|
Lbloxtmp = fftwf_alloc_real(max_numblox_W*TS*TS);
|
||||||
|
fLbloxtmp = fftwf_alloc_real(max_numblox_W*TS*TS);
|
||||||
|
|
||||||
|
int nfwd[2]={TS,TS};
|
||||||
|
|
||||||
|
//for DCT:
|
||||||
|
const fftw_r2r_kind fwdkind[2] = {FFTW_REDFT10, FFTW_REDFT10};
|
||||||
|
const fftw_r2r_kind bwdkind[2] = {FFTW_REDFT01, FFTW_REDFT01};
|
||||||
|
|
||||||
|
fftwf_plan plan_forward_blox[2];
|
||||||
|
fftwf_plan plan_backward_blox[2];
|
||||||
|
|
||||||
|
// Creating the plans with FFTW_MEASURE instead of FFTW_ESTIMATE speeds up the execute a bit
|
||||||
|
plan_forward_blox[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, Lbloxtmp, NULL, 1, TS*TS, fLbloxtmp, NULL, 1, TS*TS, fwdkind, FFTW_MEASURE );
|
||||||
|
plan_backward_blox[0] = fftwf_plan_many_r2r(2, nfwd, max_numblox_W, fLbloxtmp, NULL, 1, TS*TS, Lbloxtmp, NULL, 1, TS*TS, bwdkind, FFTW_MEASURE );
|
||||||
|
plan_forward_blox[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, Lbloxtmp, NULL, 1, TS*TS, fLbloxtmp, NULL, 1, TS*TS, fwdkind, FFTW_MEASURE );
|
||||||
|
plan_backward_blox[1] = fftwf_plan_many_r2r(2, nfwd, min_numblox_W, fLbloxtmp, NULL, 1, TS*TS, Lbloxtmp, NULL, 1, TS*TS, bwdkind, FFTW_MEASURE );
|
||||||
|
fftwf_free ( Lbloxtmp );
|
||||||
|
fftwf_free ( fLbloxtmp );
|
||||||
|
|
||||||
|
#ifdef _OPENMP
|
||||||
|
// Calculate number of tiles. If less than omp_get_max_threads(), then limit num_threads to number of tiles
|
||||||
|
int numtiles = numtiles_W * numtiles_H;
|
||||||
|
int numthreads = MIN(numtiles,omp_get_max_threads());
|
||||||
|
//if(options.RgbDenoiseThreadLimit > 0) numthreads = MIN(numthreads,options.RgbDenoiseThreadLimit);
|
||||||
|
#pragma omp parallel num_threads(numthreads)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
//DCT block data storage
|
||||||
|
float * Lblox;
|
||||||
|
float * fLblox;
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp critical
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
Lblox = fftwf_alloc_real(max_numblox_W*TS*TS);
|
||||||
|
fLblox = fftwf_alloc_real(max_numblox_W*TS*TS);
|
||||||
|
}
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
|
#endif
|
||||||
for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) {
|
for (int tiletop=0; tiletop<imheight; tiletop+=tileHskip) {
|
||||||
for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) {
|
for (int tileleft=0; tileleft<imwidth; tileleft+=tileWskip) {
|
||||||
|
|
||||||
@ -202,13 +253,14 @@ namespace rtengine {
|
|||||||
int height = tilebottom-tiletop;
|
int height = tilebottom-tiletop;
|
||||||
|
|
||||||
//input L channel
|
//input L channel
|
||||||
array2D<float> Lin(width,height,ARRAY2D_CLEAR_DATA);
|
array2D<float> Lin(width,height);
|
||||||
//wavelet denoised image
|
//wavelet denoised image
|
||||||
LabImage * labdn = new LabImage(width,height);
|
LabImage * labdn = new LabImage(width,height);
|
||||||
//residual between input and denoised L channel
|
//residual between input and denoised L channel
|
||||||
array2D<float> Ldetail(width,height,ARRAY2D_CLEAR_DATA);
|
array2D<float> Ldetail(width,height,ARRAY2D_CLEAR_DATA);
|
||||||
//pixel weight
|
//pixel weight
|
||||||
array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks
|
array2D<float> totwt(width,height,ARRAY2D_CLEAR_DATA);//weight for combining DCT blocks
|
||||||
|
//
|
||||||
|
|
||||||
//#ifdef _OPENMP
|
//#ifdef _OPENMP
|
||||||
//#pragma omp parallel for
|
//#pragma omp parallel for
|
||||||
@ -233,9 +285,9 @@ namespace rtengine {
|
|||||||
labdn->a[i1][j1] = (X-Y);
|
labdn->a[i1][j1] = (X-Y);
|
||||||
labdn->b[i1][j1] = (Y-Z);
|
labdn->b[i1][j1] = (Y-Z);
|
||||||
|
|
||||||
Ldetail[i1][j1] = 0;
|
// Ldetail[i1][j1] = 0;
|
||||||
Lin[i1][j1] = Y;
|
Lin[i1][j1] = Y;
|
||||||
totwt[i1][j1] = 0;
|
// totwt[i1][j1] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {//image is not raw; use Lab parametrization
|
} else {//image is not raw; use Lab parametrization
|
||||||
@ -262,9 +314,9 @@ namespace rtengine {
|
|||||||
labdn->a[i1][j1] = (X-Y);
|
labdn->a[i1][j1] = (X-Y);
|
||||||
labdn->b[i1][j1] = (Y-Z);
|
labdn->b[i1][j1] = (Y-Z);
|
||||||
|
|
||||||
Ldetail[i1][j1] = 0;
|
// Ldetail[i1][j1] = 0;
|
||||||
Lin[i1][j1] = Y;
|
Lin[i1][j1] = Y;
|
||||||
totwt[i1][j1] = 0;
|
// totwt[i1][j1] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -282,21 +334,32 @@ namespace rtengine {
|
|||||||
//and whether to subsample the image after wavelet filtering. Subsampling is coded as
|
//and whether to subsample the image after wavelet filtering. Subsampling is coded as
|
||||||
//binary 1 or 0 for each level, eg subsampling = 0 means no subsampling, 1 means subsample
|
//binary 1 or 0 for each level, eg subsampling = 0 means no subsampling, 1 means subsample
|
||||||
//the first level only, 7 means subsample the first three levels, etc.
|
//the first level only, 7 means subsample the first three levels, etc.
|
||||||
wavelet_decomposition Ldecomp(labdn->data, labdn->W, labdn->H, 5/*maxlevels*/, 0/*subsampling*/ );
|
|
||||||
wavelet_decomposition adecomp(labdn->data+datalen, labdn->W, labdn->H, 5, 1 );
|
|
||||||
wavelet_decomposition bdecomp(labdn->data+2*datalen, labdn->W, labdn->H, 5, 1 );
|
|
||||||
|
|
||||||
float noisevarL = SQR((dnparams.luma/125.0f)*(1+ dnparams.luma/25.0f));
|
float noisevarL = SQR((dnparams.luma/125.0f)*(1+ dnparams.luma/25.0f));
|
||||||
|
|
||||||
float noisevarab = SQR(dnparams.chroma/10.0f);
|
float noisevarab = SQR(dnparams.chroma/10.0f);
|
||||||
|
{ // enclosing this code in a block frees about 120 MB before allocating 20 MB after this block (measured with D700 NEF)
|
||||||
|
wavelet_decomposition* Ldecomp;
|
||||||
|
wavelet_decomposition* adecomp;
|
||||||
|
wavelet_decomposition* bdecomp;
|
||||||
|
|
||||||
|
Ldecomp = new wavelet_decomposition (labdn->data, labdn->W, labdn->H, 5/*maxlevels*/, 0/*subsampling*/ );
|
||||||
|
adecomp = new wavelet_decomposition (labdn->data+datalen, labdn->W, labdn->H, 5, 1 );
|
||||||
|
bdecomp = new wavelet_decomposition (labdn->data+2*datalen, labdn->W, labdn->H, 5, 1 );
|
||||||
|
|
||||||
//WaveletDenoiseAll_BiShrink(Ldecomp, adecomp, bdecomp, noisevarL, noisevarab);
|
//WaveletDenoiseAll_BiShrink(Ldecomp, adecomp, bdecomp, noisevarL, noisevarab);
|
||||||
WaveletDenoiseAll(Ldecomp, adecomp, bdecomp, noisevarL, noisevarab);
|
WaveletDenoiseAll(*Ldecomp, *adecomp, *bdecomp, noisevarL, noisevarab);
|
||||||
|
|
||||||
Ldecomp.reconstruct(labdn->data);
|
Ldecomp->reconstruct(labdn->data);
|
||||||
adecomp.reconstruct(labdn->data+datalen);
|
delete Ldecomp;
|
||||||
bdecomp.reconstruct(labdn->data+2*datalen);
|
adecomp->reconstruct(labdn->data+datalen);
|
||||||
|
delete adecomp;
|
||||||
|
bdecomp->reconstruct(labdn->data+2*datalen);
|
||||||
|
delete bdecomp;
|
||||||
|
}
|
||||||
|
|
||||||
//TODO: at this point wavelet coefficients storage can be freed
|
//TODO: at this point wavelet coefficients storage can be freed
|
||||||
|
//Issue 1680: Done now
|
||||||
|
|
||||||
//second impulse denoise
|
//second impulse denoise
|
||||||
if (dnparams.luma>0.01) {
|
if (dnparams.luma>0.01) {
|
||||||
@ -316,49 +379,35 @@ namespace rtengine {
|
|||||||
// blocks are not the same thing as tiles!
|
// blocks are not the same thing as tiles!
|
||||||
|
|
||||||
|
|
||||||
// allocate DCT data structures
|
|
||||||
|
|
||||||
// calculation for detail recovery blocks
|
// calculation for detail recovery blocks
|
||||||
const int numblox_W = ceil(((float)(width))/(offset))+2*blkrad;
|
const int numblox_W = ceil(((float)(width))/(offset))+2*blkrad;
|
||||||
const int numblox_H = ceil(((float)(height))/(offset))+2*blkrad;
|
const int numblox_H = ceil(((float)(height))/(offset))+2*blkrad;
|
||||||
|
|
||||||
//const int nrtiles = numblox_W*numblox_H;
|
//const int nrtiles = numblox_W*numblox_H;
|
||||||
// end of tiling calc
|
// end of tiling calc
|
||||||
|
{
|
||||||
//DCT block data storage
|
|
||||||
float * Lblox = (float *) fftwf_malloc (numblox_W*TS*TS * sizeof (float));
|
|
||||||
float * fLblox = (float *) fftwf_malloc (numblox_W*TS*TS * sizeof (float));
|
|
||||||
|
|
||||||
|
|
||||||
//make a plan for FFTW
|
|
||||||
fftwf_plan plan_forward_blox, plan_backward_blox;
|
|
||||||
|
|
||||||
int nfwd[2]={TS,TS};
|
|
||||||
|
|
||||||
//for DCT:
|
|
||||||
const fftw_r2r_kind fwdkind[2] = {FFTW_REDFT10, FFTW_REDFT10};
|
|
||||||
const fftw_r2r_kind bwdkind[2] = {FFTW_REDFT01, FFTW_REDFT01};
|
|
||||||
|
|
||||||
plan_forward_blox = fftwf_plan_many_r2r(2, nfwd, numblox_W, Lblox, NULL, 1, TS*TS, fLblox, NULL, 1, TS*TS, fwdkind, FFTW_ESTIMATE );
|
|
||||||
plan_backward_blox = fftwf_plan_many_r2r(2, nfwd, numblox_W, fLblox, NULL, 1, TS*TS, Lblox, NULL, 1, TS*TS, bwdkind, FFTW_ESTIMATE );
|
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
// Main detail recovery algorithm: Block loop
|
// Main detail recovery algorithm: Block loop
|
||||||
//OpenMP here
|
//OpenMP here
|
||||||
//adding omp here leads to artifacts
|
//adding omp here leads to artifacts
|
||||||
|
AlignedBufferMP<float> buffer(width + TS + 2*blkrad*offset);
|
||||||
for (int vblk=0; vblk<numblox_H; vblk++) {
|
for (int vblk=0; vblk<numblox_H; vblk++) {
|
||||||
//printf("vblock=%d",vblk);
|
//printf("vblock=%d",vblk);
|
||||||
int vblkmod = vblk%8;
|
int vblkmod = vblk%8;
|
||||||
|
|
||||||
int top = (vblk-blkrad)*offset;
|
int top = (vblk-blkrad)*offset;
|
||||||
|
AlignedBuffer<float>* pBuf = buffer.acquire();
|
||||||
float * buffer = new float [width + TS + 2*blkrad*offset];
|
// float * buffer = new float [width + TS + 2*blkrad*offset];
|
||||||
float * datarow = buffer+blkrad*offset;
|
float * datarow = (float*)pBuf->data +blkrad*offset;
|
||||||
|
|
||||||
//#ifdef _OPENMP
|
//#ifdef _OPENMP
|
||||||
//#pragma omp parallel for
|
//#pragma omp parallel for
|
||||||
//#endif
|
//#endif
|
||||||
//TODO: implement using AlignedBufferMP
|
//TODO: implement using AlignedBufferMP
|
||||||
|
// #pragma omp parallel for
|
||||||
for (int i=0/*, row=top*/; i<TS; i++/*, row++*/) {
|
for (int i=0/*, row=top*/; i<TS; i++/*, row++*/) {
|
||||||
int row = top + i;
|
int row = top + i;
|
||||||
int rr = row;
|
int rr = row;
|
||||||
@ -393,13 +442,14 @@ namespace rtengine {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}//end of filling block row
|
}//end of filling block row
|
||||||
delete[] buffer;
|
buffer.release(pBuf);
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
//fftwf_print_plan (plan_forward_blox);
|
//fftwf_print_plan (plan_forward_blox);
|
||||||
fftwf_execute_r2r(plan_forward_blox,Lblox,fLblox); // DCT an entire row of tiles
|
if(numblox_W == max_numblox_W)
|
||||||
|
fftwf_execute_r2r(plan_forward_blox[0],Lblox,fLblox); // DCT an entire row of tiles
|
||||||
|
else
|
||||||
|
fftwf_execute_r2r(plan_forward_blox[1],Lblox,fLblox); // DCT an entire row of tiles
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
// now process the vblk row of blocks for noise reduction
|
// now process the vblk row of blocks for noise reduction
|
||||||
for (int hblk=0; hblk<numblox_W; hblk++) {
|
for (int hblk=0; hblk<numblox_W; hblk++) {
|
||||||
@ -411,7 +461,10 @@ namespace rtengine {
|
|||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
//now perform inverse FT of an entire row of blocks
|
//now perform inverse FT of an entire row of blocks
|
||||||
fftwf_execute_r2r(plan_backward_blox,fLblox,Lblox); //for DCT
|
if(numblox_W == max_numblox_W)
|
||||||
|
fftwf_execute_r2r(plan_backward_blox[0],fLblox,Lblox); //for DCT
|
||||||
|
else
|
||||||
|
fftwf_execute_r2r(plan_backward_blox[1],fLblox,Lblox); //for DCT
|
||||||
|
|
||||||
int topproc = (vblk-blkrad)*offset;
|
int topproc = (vblk-blkrad)*offset;
|
||||||
|
|
||||||
@ -421,20 +474,9 @@ namespace rtengine {
|
|||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
}//end of vertical block loop
|
}//end of vertical block loop
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
// clean up
|
}
|
||||||
//#pragma omp single nowait
|
|
||||||
fftwf_destroy_plan( plan_forward_blox );
|
|
||||||
//#pragma omp single nowait
|
|
||||||
fftwf_destroy_plan( plan_backward_blox );
|
|
||||||
|
|
||||||
fftwf_free ( Lblox);
|
|
||||||
fftwf_free ( fLblox);
|
|
||||||
|
|
||||||
fftwf_cleanup();
|
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
for (int i=0; i<height; i++) {
|
for (int i=0; i<height; i++) {
|
||||||
for (int j=0; j<width; j++) {
|
for (int j=0; j<width; j++) {
|
||||||
@ -470,7 +512,7 @@ namespace rtengine {
|
|||||||
//convert back to RGB and write to destination array
|
//convert back to RGB and write to destination array
|
||||||
if (isRAW) {
|
if (isRAW) {
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
//#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
for (int i=tiletop; i<tilebottom; i++){
|
for (int i=tiletop; i<tilebottom; i++){
|
||||||
int i1 = i-tiletop;
|
int i1 = i-tiletop;
|
||||||
@ -496,7 +538,7 @@ namespace rtengine {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
//#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
for (int i=tiletop; i<tilebottom; i++){
|
for (int i=tiletop; i<tilebottom; i++){
|
||||||
int i1 = i-tiletop;
|
int i1 = i-tiletop;
|
||||||
@ -534,7 +576,15 @@ namespace rtengine {
|
|||||||
|
|
||||||
}//end of tile row
|
}//end of tile row
|
||||||
}//end of tile loop
|
}//end of tile loop
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#pragma omp critical
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
fftwf_free ( Lblox);
|
||||||
|
fftwf_free ( fLblox);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
//copy denoised image to output
|
//copy denoised image to output
|
||||||
memcpy (dst->data, dsttmp->data, 3*dst->width*dst->height*sizeof(float));
|
memcpy (dst->data, dsttmp->data, 3*dst->width*dst->height*sizeof(float));
|
||||||
|
|
||||||
@ -549,6 +599,13 @@ namespace rtengine {
|
|||||||
|
|
||||||
delete dsttmp;
|
delete dsttmp;
|
||||||
|
|
||||||
|
// destroy the plans
|
||||||
|
fftwf_destroy_plan( plan_forward_blox[0] );
|
||||||
|
fftwf_destroy_plan( plan_backward_blox[0] );
|
||||||
|
fftwf_destroy_plan( plan_forward_blox[1] );
|
||||||
|
fftwf_destroy_plan( plan_backward_blox[1] );
|
||||||
|
fftwf_cleanup();
|
||||||
|
|
||||||
}//end of main RGB_denoise
|
}//end of main RGB_denoise
|
||||||
|
|
||||||
|
|
||||||
@ -561,11 +618,10 @@ namespace rtengine {
|
|||||||
void ImProcFunctions::RGBtile_denoise (float * fLblox, int vblproc, int hblproc, int numblox_H, int numblox_W, float noisevar_Ldetail ) //for DCT
|
void ImProcFunctions::RGBtile_denoise (float * fLblox, int vblproc, int hblproc, int numblox_H, int numblox_W, float noisevar_Ldetail ) //for DCT
|
||||||
{
|
{
|
||||||
float * nbrwt = new float[TS*TS]; //for DCT
|
float * nbrwt = new float[TS*TS]; //for DCT
|
||||||
|
|
||||||
int blkstart = hblproc*TS*TS;
|
int blkstart = hblproc*TS*TS;
|
||||||
|
|
||||||
boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT
|
boxabsblur(fLblox+blkstart, nbrwt, 3, 3, TS, TS);//blur neighbor weights for more robust estimation //for DCT
|
||||||
|
#pragma omp parallel for
|
||||||
for (int n=0; n<TS*TS; n++) { //for DCT
|
for (int n=0; n<TS*TS; n++) { //for DCT
|
||||||
fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail));
|
fLblox[blkstart+n] *= (1-expf(-SQR(nbrwt[n])/noisevar_Ldetail));
|
||||||
}//output neighbor averaged result
|
}//output neighbor averaged result
|
||||||
@ -586,19 +642,19 @@ namespace rtengine {
|
|||||||
const int numblox_W = ceil(((float)(width))/(offset));
|
const int numblox_W = ceil(((float)(width))/(offset));
|
||||||
const float DCTnorm = 1.0f/(4*TS*TS); //for DCT
|
const float DCTnorm = 1.0f/(4*TS*TS); //for DCT
|
||||||
|
|
||||||
|
int imin = MAX(0,-top);
|
||||||
|
int bottom = MIN( top+TS,height);
|
||||||
|
int imax = bottom - top;
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
//add row of tiles to output image
|
//add row of tiles to output image
|
||||||
for (int hblk=0; hblk < numblox_W; hblk++) {
|
for (int hblk=0; hblk < numblox_W; hblk++) {
|
||||||
int left = (hblk-blkrad)*offset;
|
int left = (hblk-blkrad)*offset;
|
||||||
int bottom = MIN( top+TS,height);
|
|
||||||
int right = MIN(left+TS, width);
|
int right = MIN(left+TS, width);
|
||||||
int imin = MAX(0,-top);
|
|
||||||
int jmin = MAX(0,-left);
|
int jmin = MAX(0,-left);
|
||||||
int imax = bottom - top;
|
|
||||||
int jmax = right - left;
|
int jmax = right - left;
|
||||||
|
|
||||||
int indx = hblk*TS;
|
int indx = hblk*TS;
|
||||||
|
|
||||||
for (int i=imin; i<imax; i++)
|
for (int i=imin; i<imax; i++)
|
||||||
@ -719,7 +775,7 @@ namespace rtengine {
|
|||||||
//simple wavelet shrinkage
|
//simple wavelet shrinkage
|
||||||
float * sfave = new float[Wlvl_L*Hlvl_L];
|
float * sfave = new float[Wlvl_L*Hlvl_L];
|
||||||
array2D<float> edge(Wlvl_L,Hlvl_L);
|
array2D<float> edge(Wlvl_L,Hlvl_L);
|
||||||
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (MAX(Wlvl_L,Hlvl_L));
|
AlignedBufferMP<double>* buffer = new AlignedBufferMP<double> (MAX(Wlvl_L,Hlvl_L));
|
||||||
|
|
||||||
//printf("\n level=%d \n",lvl);
|
//printf("\n level=%d \n",lvl);
|
||||||
|
|
||||||
@ -829,7 +885,8 @@ namespace rtengine {
|
|||||||
wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float noisevar_ab )
|
wavelet_decomposition &WaveletCoeffs_b, float noisevar_L, float noisevar_ab )
|
||||||
{
|
{
|
||||||
int maxlvl = WaveletCoeffs_L.maxlevel();
|
int maxlvl = WaveletCoeffs_L.maxlevel();
|
||||||
|
// printf("maxlevel = %d\n",maxlvl);
|
||||||
|
//omp_set_nested(true);
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
@ -847,11 +904,12 @@ namespace rtengine {
|
|||||||
float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(lvl);
|
float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(lvl);
|
||||||
float ** WavCoeffs_a = WaveletCoeffs_a.level_coeffs(lvl);
|
float ** WavCoeffs_a = WaveletCoeffs_a.level_coeffs(lvl);
|
||||||
float ** WavCoeffs_b = WaveletCoeffs_b.level_coeffs(lvl);
|
float ** WavCoeffs_b = WaveletCoeffs_b.level_coeffs(lvl);
|
||||||
|
// printf("Hab : %d\n", Hlvl_ab);
|
||||||
|
// printf("Wab : %d\n", Wlvl_ab);
|
||||||
ShrinkAll(WavCoeffs_L, WavCoeffs_a, WavCoeffs_b, lvl, Wlvl_L, Hlvl_L, Wlvl_ab, Hlvl_ab,
|
ShrinkAll(WavCoeffs_L, WavCoeffs_a, WavCoeffs_b, lvl, Wlvl_L, Hlvl_L, Wlvl_ab, Hlvl_ab,
|
||||||
skip_L, skip_ab, noisevar_L, noisevar_ab);
|
skip_L, skip_ab, noisevar_L, noisevar_ab);
|
||||||
}
|
}
|
||||||
|
//omp_set_nested(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
Loading…
x
Reference in New Issue
Block a user