5% speedup for raw false colour suppression

2016-03-13 17:15:31 +01:00
parent 9b2e197172
commit c200c266c0
3 changed files with 25 additions and 18 deletions
--- a/rtengine/rawimagesource.cc
+++ b/rtengine/rawimagesource.cc
@@ -3392,10 +3392,8 @@ int RawImageSource::defTransform (int tran)
 void RawImageSource::processFalseColorCorrectionThread  (Imagefloat* im, array2D<float> &rbconv_Y, array2D<float> &rbconv_I, array2D<float> &rbconv_Q, array2D<float> &rbout_I, array2D<float> &rbout_Q, const int row_from, const int row_to)
 {

-    int W = im->width;
-
-    float row_I[W];
-    float row_Q[W];
+    const int W = im->width;
+    constexpr float onebynine = 1.f / 9.f;

 #ifdef __SSE2__
    vfloat buffer[12];
@@ -3512,34 +3510,35 @@ void RawImageSource::processFalseColorCorrectionThread  (Imagefloat* im, array2D

        // blur i-1th row
        if (i > row_from) {
+            convert_to_RGB (im->r(i - 1, 0), im->g(i - 1, 0), im->b(i - 1, 0), rbconv_Y[px][0], rbout_I[px][0], rbout_Q[px][0]);
+
 #ifdef _OPENMP
            #pragma omp simd
 #endif

            for (int j = 1; j < W - 1; j++) {
-                row_I[j] = (rbout_I[px][j - 1] + rbout_I[px][j] + rbout_I[px][j + 1] + rbout_I[cx][j - 1] + rbout_I[cx][j] + rbout_I[cx][j + 1] + rbout_I[nx][j - 1] + rbout_I[nx][j] + rbout_I[nx][j + 1]) / 9;
-                row_Q[j] = (rbout_Q[px][j - 1] + rbout_Q[px][j] + rbout_Q[px][j + 1] + rbout_Q[cx][j - 1] + rbout_Q[cx][j] + rbout_Q[cx][j + 1] + rbout_Q[nx][j - 1] + rbout_Q[nx][j] + rbout_Q[nx][j + 1]) / 9;
+                float I = (rbout_I[px][j - 1] + rbout_I[px][j] + rbout_I[px][j + 1] + rbout_I[cx][j - 1] + rbout_I[cx][j] + rbout_I[cx][j + 1] + rbout_I[nx][j - 1] + rbout_I[nx][j] + rbout_I[nx][j + 1]) * onebynine;
+                float Q = (rbout_Q[px][j - 1] + rbout_Q[px][j] + rbout_Q[px][j + 1] + rbout_Q[cx][j - 1] + rbout_Q[cx][j] + rbout_Q[cx][j + 1] + rbout_Q[nx][j - 1] + rbout_Q[nx][j] + rbout_Q[nx][j + 1]) * onebynine;
+                convert_to_RGB (im->r(i - 1, j), im->g(i - 1, j), im->b(i - 1, j), rbconv_Y[px][j], I, Q);
            }

-            row_I[0] = rbout_I[px][0];
-            row_Q[0] = rbout_Q[px][0];
-            row_I[W - 1] = rbout_I[px][W - 1];
-            row_Q[W - 1] = rbout_Q[px][W - 1];
-            convert_row_to_RGB (im->r(i - 1), im->g(i - 1), im->b(i - 1), rbconv_Y[px], row_I, row_Q, W);
+            convert_to_RGB (im->r(i - 1, W - 1), im->g(i - 1, W - 1), im->b(i - 1, W - 1), rbconv_Y[px][W - 1], rbout_I[px][W - 1], rbout_Q[px][W - 1]);
        }
    }

    // blur last 3 row and finalize H-1th row
+    convert_to_RGB (im->r(row_to - 1, 0), im->g(row_to - 1, 0), im->b(row_to - 1, 0), rbconv_Y[cx][0], rbout_I[cx][0], rbout_Q[cx][0]);
+#ifdef _OPENMP
+    #pragma omp simd
+#endif
+
    for (int j = 1; j < W - 1; j++) {
-        row_I[j] = (rbout_I[px][j - 1] + rbout_I[px][j] + rbout_I[px][j + 1] + rbout_I[cx][j - 1] + rbout_I[cx][j] + rbout_I[cx][j + 1] + rbconv_I[nx][j - 1] + rbconv_I[nx][j] + rbconv_I[nx][j + 1]) / 9;
-        row_Q[j] = (rbout_Q[px][j - 1] + rbout_Q[px][j] + rbout_Q[px][j + 1] + rbout_Q[cx][j - 1] + rbout_Q[cx][j] + rbout_Q[cx][j + 1] + rbconv_Q[nx][j - 1] + rbconv_Q[nx][j] + rbconv_Q[nx][j + 1]) / 9;
+        float I = (rbout_I[px][j - 1] + rbout_I[px][j] + rbout_I[px][j + 1] + rbout_I[cx][j - 1] + rbout_I[cx][j] + rbout_I[cx][j + 1] + rbconv_I[nx][j - 1] + rbconv_I[nx][j] + rbconv_I[nx][j + 1]) * onebynine;
+        float Q = (rbout_Q[px][j - 1] + rbout_Q[px][j] + rbout_Q[px][j + 1] + rbout_Q[cx][j - 1] + rbout_Q[cx][j] + rbout_Q[cx][j + 1] + rbconv_Q[nx][j - 1] + rbconv_Q[nx][j] + rbconv_Q[nx][j + 1]) * onebynine;
+        convert_to_RGB (im->r(row_to - 1, j), im->g(row_to - 1, j), im->b(row_to - 1, j), rbconv_Y[cx][j], I, Q);
    }

-    row_I[0] = rbout_I[cx][0];
-    row_Q[0] = rbout_Q[cx][0];
-    row_I[W - 1] = rbout_I[cx][W - 1];
-    row_Q[W - 1] = rbout_Q[cx][W - 1];
-    convert_row_to_RGB (im->r(row_to - 1), im->g(row_to - 1), im->b(row_to - 1), rbconv_Y[cx], row_I, row_Q, W);
+    convert_to_RGB (im->r(row_to - 1, W - 1), im->g(row_to - 1, W - 1), im->b(row_to - 1, W - 1), rbconv_Y[cx][W - 1], rbout_I[cx][W - 1], rbout_Q[cx][W - 1]);
 }

 //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
--- a/rtengine/rawimagesource.h
+++ b/rtengine/rawimagesource.h
@@ -209,6 +209,7 @@ protected:
    void processFalseColorCorrection (Imagefloat* i, const int steps);
    inline  void convert_row_to_YIQ (const float* const r, const float* const g, const float* const b, float* Y, float* I, float* Q, const int W);
    inline  void convert_row_to_RGB (float* r, float* g, float* b, const float* const Y, const float* const I, const float* const Q, const int W);
+    inline  void convert_to_RGB (float &r, float &g, float &b, const float &Y, const float &I, const float &Q);

    inline  void convert_to_cielab_row  (float* ar, float* ag, float* ab, float* oL, float* oa, float* ob);
    inline  void interpolate_row_g      (float* agh, float* agv, int i);
--- a/rtengine/rawimagesource_i.h
+++ b/rtengine/rawimagesource_i.h
@@ -51,6 +51,13 @@ inline void RawImageSource::convert_row_to_RGB (float* r, float* g, float* b, co
    }
 }

+inline void RawImageSource::convert_to_RGB (float &r, float &g, float &b, const float &Y, const float &I, const float &Q)
+{
+    r = Y + 0.956f * I + 0.621f * Q;
+    g = Y - 0.272f * I - 0.647f * Q;
+    b = Y - 1.105f * I + 1.702f * Q;
+}
+
 inline void RawImageSource::convert_to_cielab_row (float* ar, float* ag, float* ab, float* oL, float* oa, float* ob)
 {