Speedup for log-loop in tmo_fattal02

2017-11-05 16:25:13 +01:00
parent d7136fc668
commit 59043cc978
1 changed files with 35 additions and 4 deletions
--- a/rtengine/tmo_fattal02.cc
+++ b/rtengine/tmo_fattal02.cc
@@ -69,8 +69,10 @@
 #include "improcfun.h"
 #include "settings.h"
 #include "iccstore.h"
-
-
+#define BENCHMARK
+#include "StopWatch.h"
+#include "sleef.c"
+#include "opthelper.h"
 namespace rtengine {

 /******************************************************************************
@@ -426,6 +428,7 @@ void tmo_fattal02(size_t width,
                  int detail_level,
                  bool multithread)
 {
+    BENCHFUN
 // #ifdef TIMER_PROFILING
 //     msec_timer stop_watch;
 //     stop_watch.start();
@@ -471,10 +474,35 @@ void tmo_fattal02(size_t width,
  }
  Array2Df* H = new Array2Df(width, height);
  //#pragma omp parallel for private(i) shared(H, Y, maxLum)
-  for ( int i=0 ; i<size ; i++ )
+  StopWatch Stop1("logf");
+  float temp = 100.f / maxLum;
+  #pragma omp parallel
  {
-      (*H)(i) = logf( 100.0f* Y(i)/maxLum + 1e-4 );
+#ifdef __SSE2__
+  vfloat epsv = F2V(1e-4);
+  vfloat tempv = F2V(temp);
+#endif
+  #pragma omp for schedule(dynamic,16)
+  for ( size_t i=0 ; i<height ; i++ ) {
+      size_t j = 0;
+#ifdef __SSE2__
+      for(; j < width - 3; j+=4)
+      {
+          STVFU((*H)[i][j], xlogf(tempv * LVFU(Y[i][j]) + epsv));
+      }
+#endif
+      for(; j < width; j++)
+      {
+          (*H)[i][j] = xlogf( temp * Y[i][j] + 1e-4 );
+      }
  }
+  }
+//  #pragma omp parallel for
+//  for ( int i=0 ; i<size ; i++ )
+//  {
+//      (*H)(i) = xlogf( temp * Y(i) + 1e-4 );
+//  }
+  Stop1.stop();
  // ph.setValue(4);

  /** RT - this is also here to reduce the dependency of the results on the
@@ -893,6 +921,7 @@ std::vector<double> get_lambda(int n)
 void solve_pde_fft(Array2Df *F, Array2Df *U, bool multithread)/*, pfs::Progress &ph,
                                              bool adjust_bound)*/
 {
+BENCHFUN
   // ph.setValue(20);
  //DEBUG_STR << "solve_pde_fft: solving Laplace U = F ..." << std::endl;
  int width = F->getCols();
@@ -1059,6 +1088,7 @@ void rescale_bilinear(const Array2Df &src, Array2Df &dst, bool multithread)

 void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb)
 {
+    BENCHFUN
    const int detail_level = 3;

    float alpha = 1.f;
@@ -1094,6 +1124,7 @@ void ImProcFunctions::ToneMapFattal02(Imagefloat *rgb)

    // median filter on the deep shadows, to avoid boosting noise
    {
+        StopWatch Stop1("Median");
        const float luminance_noise_floor = 65.535f; // 0.1% -- is this ok?
        
 #ifdef _OPENMP