From a633f3d233551d6ae3f59d273ac0ea910ce9c938 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Thu, 7 Dec 2017 22:51:19 +0100 Subject: [PATCH] fattal: optimized memory usage --- rtengine/tmo_fattal02.cc | 163 ++++++++++----------------------------- 1 file changed, 39 insertions(+), 124 deletions(-) diff --git a/rtengine/tmo_fattal02.cc b/rtengine/tmo_fattal02.cc index 4af176cd2..571f2e430 100644 --- a/rtengine/tmo_fattal02.cc +++ b/rtengine/tmo_fattal02.cc @@ -69,7 +69,7 @@ #include "improcfun.h" #include "settings.h" #include "iccstore.h" -//#define BENCHMARK +#define BENCHMARK #include "StopWatch.h" #include "sleef.c" #include "opthelper.h" @@ -233,18 +233,12 @@ void gaussianBlur (const Array2Df& I, Array2Df& L, bool multithread) } } -void createGaussianPyramids ( Array2Df* H, Array2Df** pyramids, int nlevels, bool multithread) +void createGaussianPyramids (Array2Df** pyramids, int nlevels, bool multithread) { - int width = H->getCols(); - int height = H->getRows(); - const int size = width * height; + // pyramids[0] is already set + int width = pyramids[0]->getCols(); + int height = pyramids[0]->getRows(); - pyramids[0] = new Array2Df (width, height); - -//#pragma omp parallel for shared(pyramids, H) - for ( int i = 0 ; i < size ; i++ ) { - (*pyramids[0]) (i) = (*H) (i); - } Array2Df* L = new Array2Df (width, height); gaussianBlur ( *pyramids[0], *L, multithread ); @@ -531,77 +525,54 @@ void tmo_fattal02 (size_t width, /** RT */ - // create gaussian pyramids - // int mins = (width= MSIZE ) - // { - // nlevels++; - // mins /= 2; - // } - // // std::cout << "DEBUG: nlevels = " << nlevels << ", mins = " << mins << std::endl; - // // The following lines solves a bug with images particularly small - // if (nlevels == 0) nlevels = 1; const int nlevels = 7; // RT -- see above - Array2Df** pyramids = new Array2Df*[nlevels]; - createGaussianPyramids (H, pyramids, nlevels, multithread); - // ph.setValue(8); + Array2Df* pyramids[nlevels]; + pyramids[0] = H; + createGaussianPyramids (pyramids, nlevels, multithread); // calculate gradients and its average values on pyramid levels - Array2Df** gradients = new Array2Df*[nlevels]; - float* avgGrad = new float[nlevels]; + Array2Df* gradients[nlevels]; + float avgGrad[nlevels]; for ( int k = 0 ; k < nlevels ; k++ ) { gradients[k] = new Array2Df (pyramids[k]->getCols(), pyramids[k]->getRows()); avgGrad[k] = calculateGradients (pyramids[k], gradients[k], k, multithread); - delete pyramids[k]; + if(k != 0) // pyramids[0] is H. Will be deleted later + delete pyramids[k]; } - delete[] pyramids; - // ph.setValue(12); // calculate fi matrix Array2Df* FI = new Array2Df (width, height); calculateFiMatrix (FI, gradients, avgGrad, nlevels, detail_level, alfa, beta, noise, multithread); -// dumpPFS( "FI.pfs", FI, "Y" ); for ( int i = 0 ; i < nlevels ; i++ ) { delete gradients[i]; } - delete[] gradients; - delete[] avgGrad; - // ph.setValue(16); - // if (ph.canceled()){ - // delete FI; - // delete H; - // return; - // } - /** - RT - bring back the FI image to the input size if it was downscaled */ if (fullH) { + delete H; + H = fullH; Array2Df *FI2 = new Array2Df (fullwidth, fullheight); rescale_bilinear (*FI, *FI2, multithread); delete FI; FI = FI2; width = fullwidth; height = fullheight; - delete H; - H = fullH; } /** RT */ // attenuate gradients Array2Df* Gx = new Array2Df (width, height); - Array2Df* Gy = new Array2Df (width, height); + Array2Df* Gy = &L; // use L as buffer for Gy // the fft solver solves the Poisson pde but with slightly different // boundary conditions, so we need to adjust the assembly of the right hand // side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero // Neumann conditions assume U(-1)=U(0)), see also divergence calculation - // if (fftsolver) #pragma omp parallel for if(multithread) for ( size_t y = 0 ; y < height ; y++ ) { @@ -634,68 +605,49 @@ void tmo_fattal02 (size_t width, (*FI) (x, y) -= (*Gy) (x, y - 1); } - // if (fftsolver) - { - if (x == 0) { - (*FI) (x, y) += (*Gx) (x, y); - } + if (x == 0) { + (*FI) (x, y) += (*Gx) (x, y); + } - if (y == 0) { - (*FI) (x, y) += (*Gy) (x, y); - } + if (y == 0) { + (*FI) (x, y) += (*Gy) (x, y); } } } //delete Gx; // RT - reused as temp buffer in solve_pde_fft, deleted later - delete Gy; // solve pde and exponentiate (ie recover compressed image) { - // if (fftsolver) - { - MyMutex::MyLock lock (*fftwMutex); - solve_pde_fft (FI, &L, Gx, multithread); //, ph); - } - delete Gx; - delete FI; - // else - // { - // solve_pde_multigrid(&DivG, &U, ph); - // } -// #ifndef NDEBUG -// printf("\npde residual error: %f\n", residual_pde(&U, &DivG)); -// #endif - // ph.setValue(90); - // if ( ph.canceled() ) - // { - // return; - // } - #pragma omp parallel if(multithread) - { + MyMutex::MyLock lock (*fftwMutex); + solve_pde_fft (FI, &L, Gx, multithread); + } + delete Gx; + delete FI; + + #pragma omp parallel if(multithread) + { #ifdef __SSE2__ - vfloat gammav = F2V (gamma); + vfloat gammav = F2V (gamma); #endif - #pragma omp for schedule(dynamic,16) + #pragma omp for schedule(dynamic,16) - for (size_t i = 0 ; i < height ; i++) { - size_t j = 0; + for (size_t i = 0 ; i < height ; i++) { + size_t j = 0; #ifdef __SSE2__ - for (; j < width - 3; j += 4) { - STVFU (L[i][j], xexpf (gammav * LVFU (L[i][j]))); - } + for (; j < width - 3; j += 4) { + STVFU (L[i][j], xexpf (gammav * LVFU (L[i][j]))); + } #endif - for (; j < width; j++) { - L[i][j] = xexpf ( gamma * L[i][j]); - } + for (; j < width; j++) { + L[i][j] = xexpf ( gamma * L[i][j]); } } } - // ph.setValue(95); // remove percentile of min and max values and renormalize float cut_min = 0.01f * black_point; @@ -759,31 +711,6 @@ void tmo_fattal02 (size_t width, // atimes(). This means the assembly of the right hand side F is different // for both solvers. -// #include - -// #include - -// #include -// #include -// #include "arch/math.h" -// #include -// #ifdef _OPENMP -// #include -// #endif -// #include -// #include - -// #include "Libpfs/progress.h" -// #include "Libpfs/array2d.h" -// #include "pde.h" - -// using namespace std; - - -// #ifndef SQR -// #define SQR(x) (x)*(x) -// #endif - // returns T = EVy A EVx^tr // note, modifies input data @@ -955,23 +882,13 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/* // transforms F into eigenvector space: Ftr = //DEBUG_STR << "solve_pde_fft: transform F to ev space (fft)" << std::endl; - Array2Df* F_tr = buf; //new Array2Df(width,height); + Array2Df* F_tr = buf; transform_normal2ev (F, F_tr, multithread); // TODO: F no longer needed so could release memory, but as it is an // input parameter we won't do that - // ph.setValue(50); - // if (ph.canceled()) - // { - // delete F_tr; - // return; - // } - //DEBUG_STR << "solve_pde_fft: F_tr(0,0) = " << (*F_tr)(0,0); - //DEBUG_STR << " (must be 0 for solution to exist)" << std::endl; // in the eigenvector space the solution is very simple - //DEBUG_STR << "solve_pde_fft: solve in eigenvector space" << std::endl; -// Array2Df* U_tr = new Array2Df(width,height); std::vector l1 = get_lambda (height); std::vector l2 = get_lambda (width); @@ -985,10 +902,8 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/* (*F_tr) (0, 0) = 0.f; // any value ok, only adds a const to the solution - // transforms U_tr back to the normal space - //DEBUG_STR << "solve_pde_fft: transform U_tr to normal space (fft)" << std::endl; + // transforms F_tr back to the normal space transform_ev2normal (F_tr, U, multithread); -// delete F_tr; // no longer needed so release memory // the solution U as calculated will satisfy something like int U = 0 // since for any constant c, U-c is also a solution and we are mainly