diff --git a/rtengine/tmo_fattal02.cc b/rtengine/tmo_fattal02.cc index d537cce3f..6e4b45ccb 100644 --- a/rtengine/tmo_fattal02.cc +++ b/rtengine/tmo_fattal02.cc @@ -3,7 +3,7 @@ * This file is part of RawTherapee. * * Ported from LuminanceHDR by Alberto Griggio - * + * * RawTherapee is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or @@ -73,7 +73,8 @@ #include "StopWatch.h" #include "sleef.c" #include "opthelper.h" -namespace rtengine { +namespace rtengine +{ /****************************************************************************** * RT code @@ -84,66 +85,68 @@ extern MyMutex *fftwMutex; using namespace std; -namespace { +namespace +{ -class Array2Df: public array2D { +class Array2Df: public array2D +{ typedef array2D Super; public: Array2Df(): Super() {} - Array2Df(int w, int h): Super(w, h) {} + Array2Df (int w, int h): Super (w, h) {} - float &operator()(int w, int h) + float &operator() (int w, int h) { return (*this)[h][w]; } - const float &operator()(int w, int h) const + const float &operator() (int w, int h) const { return (*this)[h][w]; } - float &operator()(int i) + float &operator() (int i) { - return static_cast(*this)[i]; + return static_cast (*this)[i]; } - const float &operator()(int i) const + const float &operator() (int i) const { - return const_cast(*this).operator()(i); + return const_cast (*this).operator() (i); } int getRows() const { - return const_cast(*this).height(); + return const_cast (*this).height(); } int getCols() const { - return const_cast(*this).width(); + return const_cast (*this).width(); } float *data() { - return static_cast(*this); + return static_cast (*this); } const float *data() const { - return const_cast(*this).data(); + return const_cast (*this).data(); } }; // upper bound on image dimension used in tmo_fattal02 -- see the comment there const int RT_dimension_cap = 1920; -void rescale_bilinear(const Array2Df &src, Array2Df &dst, bool multithread); +void rescale_bilinear (const Array2Df &src, Array2Df &dst, bool multithread); /****************************************************************************** * Luminance HDR code (modifications are marked with an RT comment) ******************************************************************************/ -void downSample(const Array2Df& A, Array2Df& B) +void downSample (const Array2Df& A, Array2Df& B) { const int width = B.getCols(); const int height = B.getRows(); @@ -153,163 +156,164 @@ void downSample(const Array2Df& A, Array2Df& B) // speed improvements. The main issue is the pde solver and in case of the // fft solver uses optimised threaded fftw routines. //#pragma omp parallel for - for ( int y=0 ; ygetCols(); - int height = H->getRows(); - const int size = width*height; + int width = H->getCols(); + int height = H->getRows(); + const int size = width * height; + + pyramids[0] = new Array2Df (width, height); - pyramids[0] = new Array2Df(width,height); //#pragma omp parallel for shared(pyramids, H) - for( int i=0 ; i 2 && height > 2) { - width /= 2; - height /= 2; - pyramids[k] = new Array2Df(width,height); - downSample(*L, *pyramids[k]); - } else { - // RT - now nlevels is fixed in tmo_fattal02 (see the comment in - // there), so it might happen that we have to add some padding to - // the gaussian pyramids - pyramids[k] = new Array2Df(width,height); - for (int j = 0, n = width*height; j < n; ++j) { - (*pyramids[k])(j) = (*L)(j); - } - } - - if(k < nlevels -1) { - delete L; - L = new Array2Df(width,height); - gaussianBlur( *pyramids[k], *L ); + for ( int i = 0 ; i < size ; i++ ) { + (*pyramids[0]) (i) = (*H) (i); } - } - delete L; + Array2Df* L = new Array2Df (width, height); + gaussianBlur ( *pyramids[0], *L ); + + for ( int k = 1 ; k < nlevels ; k++ ) { + if (width > 2 && height > 2) { + width /= 2; + height /= 2; + pyramids[k] = new Array2Df (width, height); + downSample (*L, *pyramids[k]); + } else { + // RT - now nlevels is fixed in tmo_fattal02 (see the comment in + // there), so it might happen that we have to add some padding to + // the gaussian pyramids + pyramids[k] = new Array2Df (width, height); + + for (int j = 0, n = width * height; j < n; ++j) { + (*pyramids[k]) (j) = (*L) (j); + } + } + + if (k < nlevels - 1) { + delete L; + L = new Array2Df (width, height); + gaussianBlur ( *pyramids[k], *L ); + } + } + + delete L; } //-------------------------------------------------------------------- -float calculateGradients(Array2Df* H, Array2Df* G, int k) +float calculateGradients (Array2Df* H, Array2Df* G, int k) { - const int width = H->getCols(); - const int height = H->getRows(); - const float divider = pow( 2.0f, k+1 ); - float avgGrad = 0.0f; + const int width = H->getCols(); + const int height = H->getRows(); + const float divider = pow ( 2.0f, k + 1 ); + float avgGrad = 0.0f; -#pragma omp parallel for reduction(+:avgGrad) - for( int y=0 ; y(x * 0.5f); //x / 2.f; - int ay = static_cast(y * 0.5f); //y / 2.f; - ax = (ax (x * 0.5f); //x / 2.f; + int ay = static_cast (y * 0.5f); //y / 2.f; + ax = (ax < awidth) ? ax : awidth - 1; + ay = (ay < aheight) ? ay : aheight - 1; - B(x,y) = A(ax,ay); + B (x, y) = A (ax, ay); } } + //--- this code below produces 'use of uninitialized value error' // int width = A->getCols(); // int height = A->getRows(); @@ -345,84 +348,78 @@ void upSample(const Array2Df& A, Array2Df& B) } -void calculateFiMatrix(Array2Df* FI, Array2Df* gradients[], - float avgGrad[], int nlevels, int detail_level, - float alfa, float beta, float noise) +void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[], + float avgGrad[], int nlevels, int detail_level, + float alfa, float beta, float noise) { const bool newfattal = true; - int width = gradients[nlevels-1]->getCols(); - int height = gradients[nlevels-1]->getRows(); + int width = gradients[nlevels - 1]->getCols(); + int height = gradients[nlevels - 1]->getRows(); Array2Df** fi = new Array2Df*[nlevels]; - fi[nlevels-1] = new Array2Df(width,height); - if (newfattal) - { + fi[nlevels - 1] = new Array2Df (width, height); + + if (newfattal) { //#pragma omp parallel for shared(fi) - for ( int k = 0 ; k < width*height ; k++ ) - { - (*fi[nlevels-1])(k) = 1.0f; + for ( int k = 0 ; k < width * height ; k++ ) { + (*fi[nlevels - 1]) (k) = 1.0f; } } - for ( int k = nlevels-1; k >= 0 ; k-- ) - { + for ( int k = nlevels - 1; k >= 0 ; k-- ) { width = gradients[k]->getCols(); height = gradients[k]->getRows(); // only apply gradients to levels>=detail_level but at least to the coarsest if ( k >= detail_level - ||k==nlevels-1 - || newfattal == false) - { + || k == nlevels - 1 + || newfattal == false) { //DEBUG_STR << "calculateFiMatrix: apply gradient to level " << k << endl; #pragma omp parallel for shared(fi,avgGrad) - for ( int y = 0; y < height; y++ ) - { - for ( int x = 0; x < width; x++ ) - { - float grad = ((*gradients[k])(x,y) < 1e-4f) ? 1e-4 : (*gradients[k])(x,y); + for ( int y = 0; y < height; y++ ) { + for ( int x = 0; x < width; x++ ) { + float grad = ((*gradients[k]) (x, y) < 1e-4f) ? 1e-4 : (*gradients[k]) (x, y); float a = alfa * avgGrad[k]; - float value = pow((grad+noise)/a, beta - 1.0f); + float value = pow ((grad + noise) / a, beta - 1.0f); - if (newfattal) - (*fi[k])(x,y) *= value; - else - (*fi[k])(x,y) = value; + if (newfattal) { + (*fi[k]) (x, y) *= value; + } else { + (*fi[k]) (x, y) = value; + } } } } // create next level - if ( k>1 ) - { - width = gradients[k-1]->getCols(); - height = gradients[k-1]->getRows(); - fi[k-1] = new Array2Df(width,height); + if ( k > 1 ) { + width = gradients[k - 1]->getCols(); + height = gradients[k - 1]->getRows(); + fi[k - 1] = new Array2Df (width, height); + } else { + fi[0] = FI; // highest level -> result } - else - fi[0] = FI; // highest level -> result - if ( k>0 && newfattal ) - { - upSample(*fi[k], *fi[k-1]); // upsample to next level - gaussianBlur(*fi[k-1], *fi[k-1]); + if ( k > 0 && newfattal ) { + upSample (*fi[k], *fi[k - 1]); // upsample to next level + gaussianBlur (*fi[k - 1], *fi[k - 1]); } } - for ( int k=1 ; k 0) { // interpolate + + if (k > 0) { // interpolate int count_ = count - histo[k - 1]; float c0 = count - minPrct * size; float c1 = minPrct * size - count_; minLum = (c1 * k + c0 * (k - 1)) / ((c0 + c1) * 65535.f); } else { - minLum = k /65535.f; + minLum = k / 65535.f; } // find (maxPrct*size) smallest value - while(count < maxPrct*size) { + while (count < maxPrct * size) { count += histo[k++]; } - if(k > 0) { // interpolate + + if (k > 0) { // interpolate int count_ = count - histo[k - 1]; float c0 = count - maxPrct * size; float c1 = maxPrct * size - count_; maxLum = (c1 * k + c0 * (k - 1)) / ((c0 + c1) * 65535.f); } else { - maxLum = k /65535.f; + maxLum = k / 65535.f; } } -void solve_pde_fft(Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread); +void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread); -void tmo_fattal02(size_t width, - size_t height, - const Array2Df& Y, - Array2Df& L, - float alfa, - float beta, - float noise, - int detail_level, - bool multithread) +void tmo_fattal02 (size_t width, + size_t height, + const Array2Df& Y, + Array2Df& L, + float alfa, + float beta, + float noise, + int detail_level, + bool multithread) { // #ifdef TIMER_PROFILING // msec_timer stop_watch; @@ -507,12 +507,18 @@ void tmo_fattal02(size_t width, static const float black_point = 0.1f; static const float white_point = 0.5f; static const float gamma = 1.0f; // 0.8f; - // static const int detail_level = 3; - if ( detail_level < 0 ) detail_level = 0; - if ( detail_level > 3 ) detail_level = 3; - // ph.setValue(2); - // if (ph.canceled()) return; + // static const int detail_level = 3; + if ( detail_level < 0 ) { + detail_level = 0; + } + + if ( detail_level > 3 ) { + detail_level = 3; + } + + // ph.setValue(2); + // if (ph.canceled()) return; /* RT -- we use a hardcoded value for nlevels, to limit the * dependency of the result on the image size. When using an auto computed @@ -520,247 +526,274 @@ void tmo_fattal02(size_t width, * image sizes, making it essentially impossible to preview the tool * inside RT. With a hardcoded value, the results for the preview are much * closer to those for the final image */ - // int MSIZE = 32; // minimum size of gaussian pyramid - // // I believe a smaller value than 32 results in slightly better overall - // // quality but I'm only applying this if the newly implemented fft solver - // // is used in order not to change behaviour of the old version - // // TODO: best let the user decide this value - // // if (fftsolver) - // { - // MSIZE = 8; - // } - + // int MSIZE = 32; // minimum size of gaussian pyramid + // // I believe a smaller value than 32 results in slightly better overall + // // quality but I'm only applying this if the newly implemented fft solver + // // is used in order not to change behaviour of the old version + // // TODO: best let the user decide this value + // // if (fftsolver) + // { + // MSIZE = 8; + // } - int size = width*height; - // find max value, normalize to range 0..100 and take logarithm - float minLum = Y(0,0); - float maxLum = Y(0,0); + int size = width * height; - #pragma omp parallel for reduction(max:maxLum) - for ( int i=0 ; i RT_dimension_cap) { - float s = float(RT_dimension_cap) / float(dim); - Array2Df *HH = new Array2Df(width * s, height * s); - rescale_bilinear(*H, *HH, multithread); - fullH = H; - H = HH; - width = H->getCols(); - height = H->getRows(); - } - /** RT */ - - // create gaussian pyramids - // int mins = (width= MSIZE ) - // { - // nlevels++; - // mins /= 2; - // } - // // std::cout << "DEBUG: nlevels = " << nlevels << ", mins = " << mins << std::endl; - // // The following lines solves a bug with images particularly small - // if (nlevels == 0) nlevels = 1; - const int nlevels = 7; // RT -- see above - - Array2Df** pyramids = new Array2Df*[nlevels]; - createGaussianPyramids(H, pyramids, nlevels); - // ph.setValue(8); - - // calculate gradients and its average values on pyramid levels - Array2Df** gradients = new Array2Df*[nlevels]; - float* avgGrad = new float[nlevels]; - for ( int k=0 ; kgetCols(), pyramids[k]->getRows()); - avgGrad[k] = calculateGradients(pyramids[k],gradients[k], k); - delete pyramids[k]; - } - delete[] pyramids; - // ph.setValue(12); - - // calculate fi matrix - Array2Df* FI = new Array2Df(width, height); - calculateFiMatrix(FI, gradients, avgGrad, nlevels, detail_level, alfa, beta, noise); -// dumpPFS( "FI.pfs", FI, "Y" ); - for ( int i=0 ; i= height ? height-2 : y+1); - for ( size_t x=0 ; x= width ? width-2 : x+1); - // forward differences in H, so need to use between-points approx of FI - (*Gx)(x,y) = ((*H)(xp1,y)-(*H)(x,y)) * 0.5 * ((*FI)(xp1,y)+(*FI)(x,y)); - (*Gy)(x,y) = ((*H)(x,yp1)-(*H)(x,y)) * 0.5 * ((*FI)(x,yp1)+(*FI)(x,y)); - } + for ( int i = 0 ; i < size ; i++ ) { + maxLum = std::max (maxLum, Y (i)); } - delete H; - // calculate divergence -#pragma omp parallel for - for ( size_t y = 0; y < height; ++y ) - { - for ( size_t x = 0; x < width; ++x ) - { - (*FI)(x,y) = (*Gx)(x,y) + (*Gy)(x,y); - if ( x > 0 ) (*FI)(x,y) -= (*Gx)(x-1,y); - if ( y > 0 ) (*FI)(x,y) -= (*Gy)(x,y-1); + Array2Df* H = new Array2Df (width, height); + float temp = 100.f / maxLum; + float eps = 1e-4f; + #pragma omp parallel + { +#ifdef __SSE2__ + vfloat epsv = F2V (eps); + vfloat tempv = F2V (temp); +#endif + #pragma omp for schedule(dynamic,16) - // if (fftsolver) - { - if (x==0) (*FI)(x,y) += (*Gx)(x,y); - if (y==0) (*FI)(x,y) += (*Gy)(x,y); - } + for (size_t i = 0 ; i < height ; ++i) { + size_t j = 0; +#ifdef __SSE2__ - } - } - //delete Gx; // RT - reused as temp buffer in solve_pde_fft, deleted later - delete Gy; + for (; j < width - 3; j += 4) { + STVFU ((*H)[i][j], xlogf (tempv * LVFU (Y[i][j]) + epsv)); + } - // solve pde and exponentiate (ie recover compressed image) - { - // if (fftsolver) - { - MyMutex::MyLock lock(*fftwMutex); - solve_pde_fft(FI, &L, Gx, multithread);//, ph); - } - delete Gx; - delete FI; - // else - // { - // solve_pde_multigrid(&DivG, &U, ph); - // } +#endif + + for (; j < width; ++j) { + (*H)[i][j] = xlogf (temp * Y[i][j] + eps); + } + } + } + + /** RT - this is also here to reduce the dependency of the results on the + * input image size, with the primary aim of having a preview in RT that is + * reasonably close to the actual output image. Intuitively, what we do is + * to put a cap on the dimension of the image processed, so that it is close + * in size to the typical preview that you will see on a normal consumer + * monitor. (That's where the 1920 value for RT_dimension_cap comes from.) + * However, we can't simply downscale the input Y array and then upscale it + * on output, because that would cause a big loss of sharpness (confirmed by + * testing). + * So, we use a different method: we downscale the H array, so that we + * compute a downscaled gaussian pyramid and a downscaled FI matrix. Then, + * we upscale the FI matrix later on, before it gets combined with the + * original input luminance array H. This seems to preserve the input + * sharpness and at the same time significantly reduce the dependency of the + * result on the input size. Clearly this is a hack, and keep in mind that I + * do not really know how Fattal works (it comes from LuminanceHDR almost + * verbatim), so this should probably be revised/reviewed by someone who + * knows better... also, we use a quite naive bilinear interpolation + * algorithm (see rescale_bilinear below), which could definitely be + * improved */ + int fullwidth = width; + int fullheight = height; + int dim = std::max (width, height); + Array2Df *fullH = nullptr; + + if (dim > RT_dimension_cap) { + float s = float (RT_dimension_cap) / float (dim); + Array2Df *HH = new Array2Df (width * s, height * s); + rescale_bilinear (*H, *HH, multithread); + fullH = H; + H = HH; + width = H->getCols(); + height = H->getRows(); + } + + /** RT */ + + // create gaussian pyramids + // int mins = (width= MSIZE ) + // { + // nlevels++; + // mins /= 2; + // } + // // std::cout << "DEBUG: nlevels = " << nlevels << ", mins = " << mins << std::endl; + // // The following lines solves a bug with images particularly small + // if (nlevels == 0) nlevels = 1; + const int nlevels = 7; // RT -- see above + + Array2Df** pyramids = new Array2Df*[nlevels]; + createGaussianPyramids (H, pyramids, nlevels); + // ph.setValue(8); + + // calculate gradients and its average values on pyramid levels + Array2Df** gradients = new Array2Df*[nlevels]; + float* avgGrad = new float[nlevels]; + + for ( int k = 0 ; k < nlevels ; k++ ) { + gradients[k] = new Array2Df (pyramids[k]->getCols(), pyramids[k]->getRows()); + avgGrad[k] = calculateGradients (pyramids[k], gradients[k], k); + delete pyramids[k]; + } + + delete[] pyramids; + // ph.setValue(12); + + // calculate fi matrix + Array2Df* FI = new Array2Df (width, height); + calculateFiMatrix (FI, gradients, avgGrad, nlevels, detail_level, alfa, beta, noise); + +// dumpPFS( "FI.pfs", FI, "Y" ); + for ( int i = 0 ; i < nlevels ; i++ ) { + delete gradients[i]; + } + + delete[] gradients; + delete[] avgGrad; + // ph.setValue(16); + // if (ph.canceled()){ + // delete FI; + // delete H; + // return; + // } + + /** - RT - bring back the FI image to the input size if it was downscaled */ + if (fullH) { + Array2Df *FI2 = new Array2Df (fullwidth, fullheight); + rescale_bilinear (*FI, *FI2, multithread); + delete FI; + FI = FI2; + width = fullwidth; + height = fullheight; + delete H; + H = fullH; + } + + /** RT */ + + // attenuate gradients + Array2Df* Gx = new Array2Df (width, height); + Array2Df* Gy = new Array2Df (width, height); + + // the fft solver solves the Poisson pde but with slightly different + // boundary conditions, so we need to adjust the assembly of the right hand + // side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero + // Neumann conditions assume U(-1)=U(0)), see also divergence calculation + // if (fftsolver) + #pragma omp parallel for + + for ( size_t y = 0 ; y < height ; y++ ) { + // sets index+1 based on the boundary assumption H(N+1)=H(N-1) + unsigned int yp1 = (y + 1 >= height ? height - 2 : y + 1); + + for ( size_t x = 0 ; x < width ; x++ ) { + // sets index+1 based on the boundary assumption H(N+1)=H(N-1) + unsigned int xp1 = (x + 1 >= width ? width - 2 : x + 1); + // forward differences in H, so need to use between-points approx of FI + (*Gx) (x, y) = ((*H) (xp1, y) - (*H) (x, y)) * 0.5 * ((*FI) (xp1, y) + (*FI) (x, y)); + (*Gy) (x, y) = ((*H) (x, yp1) - (*H) (x, y)) * 0.5 * ((*FI) (x, yp1) + (*FI) (x, y)); + } + } + + delete H; + + // calculate divergence + #pragma omp parallel for + + for ( size_t y = 0; y < height; ++y ) { + for ( size_t x = 0; x < width; ++x ) { + (*FI) (x, y) = (*Gx) (x, y) + (*Gy) (x, y); + + if ( x > 0 ) { + (*FI) (x, y) -= (*Gx) (x - 1, y); + } + + if ( y > 0 ) { + (*FI) (x, y) -= (*Gy) (x, y - 1); + } + + // if (fftsolver) + { + if (x == 0) { + (*FI) (x, y) += (*Gx) (x, y); + } + + if (y == 0) { + (*FI) (x, y) += (*Gy) (x, y); + } + } + + } + } + + //delete Gx; // RT - reused as temp buffer in solve_pde_fft, deleted later + delete Gy; + + // solve pde and exponentiate (ie recover compressed image) + { + // if (fftsolver) + { + MyMutex::MyLock lock (*fftwMutex); + solve_pde_fft (FI, &L, Gx, multithread); //, ph); + } + delete Gx; + delete FI; + // else + // { + // solve_pde_multigrid(&DivG, &U, ph); + // } // #ifndef NDEBUG // printf("\npde residual error: %f\n", residual_pde(&U, &DivG)); // #endif - // ph.setValue(90); - // if ( ph.canceled() ) - // { - // return; - // } - #pragma omp parallel - { + // ph.setValue(90); + // if ( ph.canceled() ) + // { + // return; + // } + #pragma omp parallel + { #ifdef __SSE2__ - vfloat gammav = F2V(gamma); + vfloat gammav = F2V (gamma); #endif - #pragma omp for schedule(dynamic,16) - for (size_t i=0 ; i=0.0f && (cut_max<=1.0f) && (cut_min 1.0 + for (; j < width; j++) { + L[i][j] = xexpf ( gamma * L[i][j]); + } + } + } + } + // ph.setValue(95); + + // remove percentile of min and max values and renormalize + float cut_min = 0.01f * black_point; + float cut_max = 1.0f - 0.01f * white_point; + assert (cut_min >= 0.0f && (cut_max <= 1.0f) && (cut_min < cut_max)); + findMaxMinPercentile (L, cut_min, minLum, cut_max, maxLum); + float dividor = (maxLum - minLum); + + #pragma omp parallel for + + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < width; ++j) { + L[i][j] = std::max ((L[i][j] - minLum) / dividor, 0.f); + // note, we intentionally do not cut off values > 1.0 + } } - } } @@ -836,91 +869,94 @@ void tmo_fattal02(size_t width, // returns T = EVy A EVx^tr // note, modifies input data -void transform_ev2normal(Array2Df *A, Array2Df *T) +void transform_ev2normal (Array2Df *A, Array2Df *T) { - int width = A->getCols(); - int height = A->getRows(); - assert((int)T->getCols()==width && (int)T->getRows()==height); + int width = A->getCols(); + int height = A->getRows(); + assert ((int)T->getCols() == width && (int)T->getRows() == height); - // the discrete cosine transform is not exactly the transform needed - // need to scale input values to get the right transformation - #pragma omp parallel for - for(int y=1 ; ydata(), T->data(), - FFTW_REDFT00, FFTW_REDFT00, FFTW_ESTIMATE); - fftwf_execute(p); - fftwf_destroy_plan(p); + for (int y = 1 ; y < height - 1 ; y++ ) { + (*A) (0, y) *= 0.5; + (*A) (width - 1, y) *= 0.5f; + } + + // note, fftw provides its own memory allocation routines which + // ensure that memory is properly 16/32 byte aligned so it can + // use SSE/AVX operations (2/4 double ops in parallel), if our + // data is not properly aligned fftw won't use SSE/AVX + // (I believe new() aligns memory to 16 byte so avoid overhead here) + // + // double* in = (double*) fftwf_malloc(sizeof(double) * width*height); + // fftwf_free(in); + + // executes 2d discrete cosine transform + fftwf_plan p; + p = fftwf_plan_r2r_2d (height, width, A->data(), T->data(), + FFTW_REDFT00, FFTW_REDFT00, FFTW_ESTIMATE); + fftwf_execute (p); + fftwf_destroy_plan (p); } // returns T = EVy^-1 * A * (EVx^-1)^tr -void transform_normal2ev(Array2Df *A, Array2Df *T) +void transform_normal2ev (Array2Df *A, Array2Df *T) { - int width = A->getCols(); - int height = A->getRows(); - assert((int)T->getCols()==width && (int)T->getRows()==height); + int width = A->getCols(); + int height = A->getRows(); + assert ((int)T->getCols() == width && (int)T->getRows() == height); - // executes 2d discrete cosine transform - fftwf_plan p; - p=fftwf_plan_r2r_2d(height, width, A->data(), T->data(), - FFTW_REDFT00, FFTW_REDFT00, FFTW_ESTIMATE); - fftwf_execute(p); - fftwf_destroy_plan(p); + // executes 2d discrete cosine transform + fftwf_plan p; + p = fftwf_plan_r2r_2d (height, width, A->data(), T->data(), + FFTW_REDFT00, FFTW_REDFT00, FFTW_ESTIMATE); + fftwf_execute (p); + fftwf_destroy_plan (p); - // need to scale the output matrix to get the right transform - float factor = (1.0f/((height-1)*(width-1))); -#pragma omp parallel for - for(int y=0 ; y get_lambda(int n) +std::vector get_lambda (int n) { - assert(n>1); - std::vector v(n); - for (int i=0; i 1); + std::vector v (n); - return v; + for (int i = 0; i < n; i++) { + v[i] = -4.0 * SQR (sin ((double)i / (2 * (n - 1)) * RT_PI)); + } + + return v; } // // makes boundary conditions compatible so that a solution exists @@ -967,99 +1003,105 @@ std::vector get_lambda(int n) // not modified and the equation might not have a solution but an // approximate solution with a minimum error is then calculated // double precision version -void solve_pde_fft(Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*, pfs::Progress &ph, +void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*, pfs::Progress &ph, bool adjust_bound)*/ { - // ph.setValue(20); - //DEBUG_STR << "solve_pde_fft: solving Laplace U = F ..." << std::endl; - int width = F->getCols(); - int height = F->getRows(); - assert((int)U->getCols()==width && (int)U->getRows()==height); - assert(buf->getCols()==width && buf->getRows()==height); + // ph.setValue(20); + //DEBUG_STR << "solve_pde_fft: solving Laplace U = F ..." << std::endl; + int width = F->getCols(); + int height = F->getRows(); + assert ((int)U->getCols() == width && (int)U->getRows() == height); + assert (buf->getCols() == width && buf->getRows() == height); - // activate parallel execution of fft routines + // activate parallel execution of fft routines #ifdef RT_FFTW3F_OMP - if (multithread) { - fftwf_init_threads(); - fftwf_plan_with_nthreads( omp_get_max_threads() ); - } + + if (multithread) { + fftwf_init_threads(); + fftwf_plan_with_nthreads ( omp_get_max_threads() ); + } + // #else // fftwf_plan_with_nthreads( 2 ); #endif - // in general there might not be a solution to the Poisson pde - // with Neumann boundary conditions unless the boundary satisfies - // an integral condition, this function modifies the boundary so that - // the condition is exactly satisfied - // if(adjust_bound) - // { - // //DEBUG_STR << "solve_pde_fft: checking boundary conditions" << std::endl; - // make_compatible_boundary(F); - // } + // in general there might not be a solution to the Poisson pde + // with Neumann boundary conditions unless the boundary satisfies + // an integral condition, this function modifies the boundary so that + // the condition is exactly satisfied + // if(adjust_bound) + // { + // //DEBUG_STR << "solve_pde_fft: checking boundary conditions" << std::endl; + // make_compatible_boundary(F); + // } - // transforms F into eigenvector space: Ftr = - //DEBUG_STR << "solve_pde_fft: transform F to ev space (fft)" << std::endl; - Array2Df* F_tr = buf; //new Array2Df(width,height); - transform_normal2ev(F, F_tr); - // TODO: F no longer needed so could release memory, but as it is an - // input parameter we won't do that - // ph.setValue(50); - // if (ph.canceled()) - // { - // delete F_tr; - // return; - // } + // transforms F into eigenvector space: Ftr = + //DEBUG_STR << "solve_pde_fft: transform F to ev space (fft)" << std::endl; + Array2Df* F_tr = buf; //new Array2Df(width,height); + transform_normal2ev (F, F_tr); + // TODO: F no longer needed so could release memory, but as it is an + // input parameter we won't do that + // ph.setValue(50); + // if (ph.canceled()) + // { + // delete F_tr; + // return; + // } - //DEBUG_STR << "solve_pde_fft: F_tr(0,0) = " << (*F_tr)(0,0); - //DEBUG_STR << " (must be 0 for solution to exist)" << std::endl; + //DEBUG_STR << "solve_pde_fft: F_tr(0,0) = " << (*F_tr)(0,0); + //DEBUG_STR << " (must be 0 for solution to exist)" << std::endl; - // in the eigenvector space the solution is very simple - //DEBUG_STR << "solve_pde_fft: solve in eigenvector space" << std::endl; + // in the eigenvector space the solution is very simple + //DEBUG_STR << "solve_pde_fft: solve in eigenvector space" << std::endl; // Array2Df* U_tr = new Array2Df(width,height); - std::vector l1=get_lambda(height); - std::vector l2=get_lambda(width); + std::vector l1 = get_lambda (height); + std::vector l2 = get_lambda (width); -#pragma omp parallel for - for(int y=0 ; y 0); + assert (dim > 0); unsigned int v = dim; v--; v |= v >> 1; @@ -1172,7 +1218,7 @@ inline int round_up_pow2(int dim) return v; } -inline int find_fast_dim(int dim) +inline int find_fast_dim (int dim) { // as per the FFTW docs: // @@ -1183,39 +1229,42 @@ inline int find_fast_dim(int dim) // the above form. This is not exhaustive, but should be ok for pictures // up to 100MPix at least - int d1 = round_up_pow2(dim); + int d1 = round_up_pow2 (dim); std::vector d = { - d1/128 * 65, - d1/64 * 33, - d1/512 * 273, - d1/16 * 9, - d1/8 * 5, - d1/16 * 11, - d1/128 * 91, - d1/4 * 3, - d1/64 * 49, - d1/16 * 13, - d1/8 * 7, + d1 / 128 * 65, + d1 / 64 * 33, + d1 / 512 * 273, + d1 / 16 * 9, + d1 / 8 * 5, + d1 / 16 * 11, + d1 / 128 * 91, + d1 / 4 * 3, + d1 / 64 * 49, + d1 / 16 * 13, + d1 / 8 * 7, d1 }; + for (size_t i = 0; i < d.size(); ++i) { if (d[i] >= dim) { return d[i]; } } - assert(false); + + assert (false); return dim; } } // namespace -void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) +void ImProcFunctions::ToneMapFattal02 (Imagefloat *rgb) { BENCHFUN const int detail_level = 3; float alpha = 1.f; + if (params->fattal.threshold < 0) { alpha += (params->fattal.threshold * 0.9f) / 100.f; } else if (params->fattal.threshold > 0) { @@ -1223,7 +1272,7 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) } float beta = 1.f - (params->fattal.amount * 0.3f) / 100.f; - + // sanity check if (alpha <= 0 || beta <= 0) { return; @@ -1231,35 +1280,38 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) int w = rgb->getWidth(); int h = rgb->getHeight(); - - Array2Df Yr(w, h); + + Array2Df Yr (w, h); const float epsilon = 1e-4f; const float luminance_noise_floor = 65.535f; const float min_luminance = 1.f; - TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix(params->icm.working); + TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix (params->icm.working); #ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif + for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { - Yr(x, y) = std::max(luminance(rgb->r(y, x), rgb->g(y, x), rgb->b(y, x), ws), min_luminance); // clip really black pixels + Yr (x, y) = std::max (luminance (rgb->r (y, x), rgb->g (y, x), rgb->b (y, x), ws), min_luminance); // clip really black pixels } } + // median filter on the deep shadows, to avoid boosting noise // because w2 >= w and h2 >= h, we can use the L buffer as temporary buffer for Median_Denoise() - int w2 = find_fast_dim(w) + 1; - int h2 = find_fast_dim(h) + 1; - Array2Df L(w2, h2); + int w2 = find_fast_dim (w) + 1; + int h2 = find_fast_dim (h) + 1; + Array2Df L (w2, h2); { #ifdef _OPENMP int num_threads = multiThread ? omp_get_max_threads() : 1; #else int num_threads = 1; #endif - float r = float(std::max(w, h)) / float(RT_dimension_cap); + float r = float (std::max (w, h)) / float (RT_dimension_cap); Median med; + if (r >= 3) { med = Median::TYPE_7X7; } else if (r >= 2) { @@ -1269,7 +1321,8 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) } else { med = Median::TYPE_3X3_STRONG; } - Median_Denoise(Yr, Yr, luminance_noise_floor, w, h, med, 1, num_threads, L); + + Median_Denoise (Yr, Yr, luminance_noise_floor, w, h, med, 1, num_threads, L); } float noise = alpha * 0.01f; @@ -1279,27 +1332,29 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) << ", detail_level = " << detail_level << std::endl; } - rescale_nearest(Yr, L, multiThread); - tmo_fattal02(w2, h2, L, L, alpha, beta, noise, detail_level, multiThread); + rescale_nearest (Yr, L, multiThread); + tmo_fattal02 (w2, h2, L, L, alpha, beta, noise, detail_level, multiThread); // tmo_fattal02(w, h, Yr, L, alpha, beta, noise, detail_level, multiThread); #ifdef _OPENMP #pragma omp parallel for if(multiThread) #endif + for (int y = 0; y < h; y++) { int yy = y * h2 / h; + for (int x = 0; x < w; x++) { int xx = x * w2 / w; - float Y = Yr(x, y); - float l = std::max(L(xx, yy), epsilon) * (65535.f / Y); - rgb->r(y, x) = std::max(rgb->r(y, x), 0.f) * l; - rgb->g(y, x) = std::max(rgb->g(y, x), 0.f) * l; - rgb->b(y, x) = std::max(rgb->b(y, x), 0.f) * l; - - assert(std::isfinite(rgb->r(y, x))); - assert(std::isfinite(rgb->g(y, x))); - assert(std::isfinite(rgb->b(y, x))); + float Y = Yr (x, y); + float l = std::max (L (xx, yy), epsilon) * (65535.f / Y); + rgb->r (y, x) = std::max (rgb->r (y, x), 0.f) * l; + rgb->g (y, x) = std::max (rgb->g (y, x), 0.f) * l; + rgb->b (y, x) = std::max (rgb->b (y, x), 0.f) * l; + + assert (std::isfinite (rgb->r (y, x))); + assert (std::isfinite (rgb->g (y, x))); + assert (std::isfinite (rgb->b (y, x))); } } }