diff --git a/rtengine/tmo_fattal02.cc b/rtengine/tmo_fattal02.cc index 939ceeff8..851a88884 100644 --- a/rtengine/tmo_fattal02.cc +++ b/rtengine/tmo_fattal02.cc @@ -171,8 +171,6 @@ void gaussianBlur(const Array2Df& I, Array2Df& L) const int width = I.getCols(); const int height = I.getRows(); - Array2Df T(width,height); - if (width < 3 || height < 3) { if (&I != &L) { for (int i = 0, n = width*height; i < n; ++i) { @@ -182,8 +180,10 @@ void gaussianBlur(const Array2Df& I, Array2Df& L) return; } + Array2Df T(width,height); + //--- X blur - //#pragma omp parallel for shared(I, T) + #pragma omp parallel for shared(I, T) for ( int y=0 ; y maxLum ) ? Y(i) : maxLum; + maxLum = std::max(maxLum, Y(i)); } - Array2Df* H = new Array2Df(width, height); + Array2Df* H = new Array2Df(width, height); float temp = 100.f / maxLum; float eps = 1e-4f; #pragma omp parallel @@ -652,71 +669,50 @@ void tmo_fattal02(size_t width, // side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero // Neumann conditions assume U(-1)=U(0)), see also divergence calculation // if (fftsolver) - for ( size_t y=0 ; y= height ? height-2 : y+1); for ( size_t x=0 ; x= height ? height-2 : y+1); unsigned int xp1 = (x+1 >= width ? width-2 : x+1); // forward differences in H, so need to use between-points approx of FI - (*Gx)(x,y) = ((*H)(xp1,y)-(*H)(x,y)) * 0.5*((*FI)(xp1,y)+(*FI)(x,y)); - (*Gy)(x,y) = ((*H)(x,yp1)-(*H)(x,y)) * 0.5*((*FI)(x,yp1)+(*FI)(x,y)); + (*Gx)(x,y) = ((*H)(xp1,y)-(*H)(x,y)) * 0.5 * ((*FI)(xp1,y)+(*FI)(x,y)); + (*Gy)(x,y) = ((*H)(x,yp1)-(*H)(x,y)) * 0.5 * ((*FI)(x,yp1)+(*FI)(x,y)); } - // else - // for ( size_t y=0 ; y 0 ) DivG(x,y) -= (*Gx)(x-1,y); - if ( y > 0 ) DivG(x,y) -= (*Gy)(x,y-1); + (*FI)(x,y) = (*Gx)(x,y) + (*Gy)(x,y); + if ( x > 0 ) (*FI)(x,y) -= (*Gx)(x-1,y); + if ( y > 0 ) (*FI)(x,y) -= (*Gy)(x,y-1); // if (fftsolver) { - if (x==0) DivG(x,y) += (*Gx)(x,y); - if (y==0) DivG(x,y) += (*Gy)(x,y); + if (x==0) (*FI)(x,y) += (*Gx)(x,y); + if (y==0) (*FI)(x,y) += (*Gy)(x,y); } } } delete Gx; delete Gy; - // ph.setValue(20); - // if (ph.canceled()) - // { - // return; - // } - -// dumpPFS( "DivG.pfs", DivG, "Y" ); // solve pde and exponentiate (ie recover compressed image) { - Array2Df U(width, height); // if (fftsolver) { MyMutex::MyLock lock(*fftwMutex); - solve_pde_fft(&DivG, &U, multithread);//, ph); + solve_pde_fft(FI, &L, multithread);//, ph); } + delete FI; // else // { // solve_pde_multigrid(&DivG, &U, ph); @@ -729,7 +725,6 @@ void tmo_fattal02(size_t width, // { // return; // } - #pragma omp parallel { #ifdef __SSE2__ @@ -740,15 +735,14 @@ void tmo_fattal02(size_t width, size_t j = 0; #ifdef __SSE2__ for(; j < width - 3; j+=4) { - STVFU(L[i][j], xexpf(gammav * LVFU(U[i][j]))); + STVFU(L[i][j], xexpf(gammav * LVFU(L[i][j]))); } #endif for(; j < width; j++) { - L[i][j] = xexpf( gamma * U[i][j]); + L[i][j] = xexpf( gamma * L[i][j]); } } } - } // ph.setValue(95); @@ -757,22 +751,15 @@ void tmo_fattal02(size_t width, float cut_max = 1.0f - 0.01f * white_point; assert(cut_min>=0.0f && (cut_max<=1.0f) && (cut_min 1.0 - } -// #ifdef TIMER_PROFILING -// stop_watch.stop_and_update(); -// cout << endl; -// cout << "tmo_fattal02 = " << stop_watch.get_time() << " msec" << endl; -// #endif + float dividor = (maxLum - minLum); - // ph.setValue(96); +#pragma omp parallel for + for (size_t i = 0; i < height; ++i) { + for (size_t j = 0; j < width; ++j) { + L[i][j] = std::max((L[i][j] - minLum) / dividor, 0.f); + // note, we intentionally do not cut off values > 1.0 + } + } } @@ -856,6 +843,7 @@ void transform_ev2normal(Array2Df *A, Array2Df *T) // the discrete cosine transform is not exactly the transform needed // need to scale input values to get the right transformation + #pragma omp parallel for for(int y=1 ; y l1=get_lambda(height); std::vector l2=get_lambda(width); + +#pragma omp parallel for for(int y=0 ; y d = { d1/128 * 65, @@ -1238,18 +1226,17 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) if (alpha <= 0 || beta <= 0) { return; } - + int w = rgb->getWidth(); int h = rgb->getHeight(); Array2Df Yr(w, h); - Array2Df L(w, h); const float epsilon = 1e-4f; const float luminance_noise_floor = 65.535f; const float min_luminance = 1.f; TMatrix ws = ICCStore::getInstance()->workingSpaceMatrix(params->icm.working); - + #ifdef _OPENMP #pragma omp parallel for if (multiThread) #endif @@ -1258,7 +1245,6 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) Yr(x, y) = std::max(luminance(rgb->r(y, x), rgb->g(y, x), rgb->b(y, x), ws), min_luminance); // clip really black pixels } } - // median filter on the deep shadows, to avoid boosting noise { #ifdef _OPENMP @@ -1280,6 +1266,7 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) Median_Denoise(Yr, Yr, luminance_noise_floor, w, h, med, 1, num_threads); } + float noise = alpha * 0.01f; if (settings->verbose) { @@ -1287,34 +1274,38 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb) << ", detail_level = " << detail_level << std::endl; } - //tmo_fattal02(w, h, Yr, L, alpha, beta, noise, detail_level, multiThread); - { int w2 = find_fast_dim(w) + 1; int h2 = find_fast_dim(h) + 1; Array2Df buf(w2, h2); rescale_nearest(Yr, buf, multiThread); tmo_fattal02(w2, h2, buf, buf, alpha, beta, noise, detail_level, multiThread); + Array2Df L(w, h); rescale_nearest(buf, L, multiThread); - } +// tmo_fattal02(w, h, Yr, L, alpha, beta, noise, detail_level, multiThread); + +StopWatch Stopx("second Last part"); #ifdef _OPENMP #pragma omp parallel for if(multiThread) #endif for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { float Y = Yr(x, y); - float l = std::max(L(x, y), epsilon); - rgb->r(y, x) = std::max(rgb->r(y, x)/Y, 0.f) * l; - rgb->g(y, x) = std::max(rgb->g(y, x)/Y, 0.f) * l; - rgb->b(y, x) = std::max(rgb->b(y, x)/Y, 0.f) * l; + float l = std::max(L(x, y), epsilon) * (65535.f / Y); + rgb->r(y, x) = std::max(rgb->r(y, x), 0.f) * l; + rgb->g(y, x) = std::max(rgb->g(y, x), 0.f) * l; + rgb->b(y, x) = std::max(rgb->b(y, x), 0.f) * l; assert(std::isfinite(rgb->r(y, x))); assert(std::isfinite(rgb->g(y, x))); assert(std::isfinite(rgb->b(y, x))); } } +Stopx.stop(); +StopWatch Stopy("Last part"); - rgb->normalizeFloatTo65535(); +// rgb->normalizeFloatTo65535(); +Stopy.stop(); }