Speedup for transform

2019-08-27 13:25:34 +02:00
parent 061bf713c8
commit 9a624ca01e
2 changed files with 86 additions and 143 deletions
--- a/rtengine/improcfun.h
+++ b/rtengine/improcfun.h
@@ -77,116 +77,6 @@ class ImProcFunctions
    bool needsLensfun();
 //   static cmsUInt8Number* Mempro = NULL;

-    inline void interpolateTransformCubic(Imagefloat* src, int xs, int ys, double Dx, double Dy, float *r, float *g, float *b, double mul)
-    {
-        const double A = -0.85;
-
-        double w[4];
-
-        {
-            double t1, t2;
-            t1 = -A * (Dx - 1.0) * Dx;
-            t2 = (3.0 - 2.0 * Dx) * Dx * Dx;
-            w[3] = t1 * Dx;
-            w[2] = t1 * (Dx - 1.0) + t2;
-            w[1] = -t1 * Dx + 1.0 - t2;
-            w[0] = -t1 * (Dx - 1.0);
-        }
-
-        double rd, gd, bd;
-        double yr[4] = {0.0}, yg[4] = {0.0}, yb[4] = {0.0};
-
-        for (int k = ys, kx = 0; k < ys + 4; k++, kx++) {
-            rd = gd = bd = 0.0;
-
-            for (int i = xs, ix = 0; i < xs + 4; i++, ix++) {
-                rd += src->r(k, i) * w[ix];
-                gd += src->g(k, i) * w[ix];
-                bd += src->b(k, i) * w[ix];
-            }
-
-            yr[kx] = rd;
-            yg[kx] = gd;
-            yb[kx] = bd;
-        }
-
-
-        {
-            double t1, t2;
-
-            t1 = -A * (Dy - 1.0) * Dy;
-            t2 = (3.0 - 2.0 * Dy) * Dy * Dy;
-            w[3] = t1 * Dy;
-            w[2] = t1 * (Dy - 1.0) + t2;
-            w[1] = -t1 * Dy + 1.0 - t2;
-            w[0] = -t1 * (Dy - 1.0);
-        }
-
-        rd = gd = bd = 0.0;
-
-        for (int i = 0; i < 4; i++) {
-            rd += yr[i] * w[i];
-            gd += yg[i] * w[i];
-            bd += yb[i] * w[i];
-        }
-
-        *r = rd * mul;
-        *g = gd * mul;
-        *b = bd * mul;
-
-        //  if (xs==100 && ys==100)
-        //    printf ("r=%g, g=%g\n", *r, *g);
-    }
-
-    inline void interpolateTransformChannelsCubic(float** src, int xs, int ys, double Dx, double Dy, float *r, double mul)
-    {
-        const double A = -0.85;
-
-        double w[4];
-
-        {
-            double t1, t2;
-            t1 = -A * (Dx - 1.0) * Dx;
-            t2 = (3.0 - 2.0 * Dx) * Dx * Dx;
-            w[3] = t1 * Dx;
-            w[2] = t1 * (Dx - 1.0) + t2;
-            w[1] = -t1 * Dx + 1.0 - t2;
-            w[0] = -t1 * (Dx - 1.0);
-        }
-
-        double rd;
-        double yr[4] = {0.0};
-
-        for (int k = ys, kx = 0; k < ys + 4; k++, kx++) {
-            rd = 0.0;
-
-            for (int i = xs, ix = 0; i < xs + 4; i++, ix++) {
-                rd += src[k][i] * w[ix];
-            }
-
-            yr[kx] = rd;
-        }
-
-
-        {
-            double t1, t2;
-            t1 = -A * (Dy - 1.0) * Dy;
-            t2 = (3.0 - 2.0 * Dy) * Dy * Dy;
-            w[3] = t1 * Dy;
-            w[2] = t1 * (Dy - 1.0) + t2;
-            w[1] = -t1 * Dy + 1.0 - t2;
-            w[0] = -t1 * (Dy - 1.0);
-        }
-
-        rd = 0.0;
-
-        for (int i = 0; i < 4; i++) {
-            rd += yr[i] * w[i];
-        }
-
-        *r = rd * mul;
-    }
-

 public:
    enum class Median {
--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@@ -26,7 +26,8 @@
 #include "rt_math.h"
 #include "sleef.c"
 #include "rtlensfun.h"
-
+#define BENCHMARK
+#include "StopWatch.h"

 using namespace std;

@@ -87,6 +88,66 @@ float normn (float a, float b, int n)
    }
 }

+inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)
+{
+    constexpr float A = -0.85f;
+
+    // Vertical
+    const float t1Vert = A * (Dy - Dy * Dy);
+    const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy;
+    const float w3Vert = t1Vert * Dy;
+    const float w2Vert = t1Vert * Dy - t1Vert + t2Vert;
+    const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert;
+    const float w0Vert = t1Vert - (t1Vert * Dy);
+
+    float rv[4], gv[4], bv[4];
+    for (int i = 0; i < 4; ++i) {
+        rv[i] = w0Vert * src->r(ys, xs + i) + w1Vert * src->r(ys + 1, xs + i) + w2Vert * src->r(ys + 2, xs + i) + w3Vert * src->r(ys + 3, xs + i);
+        gv[i] = w0Vert * src->g(ys, xs + i) + w1Vert * src->g(ys + 1, xs + i) + w2Vert * src->g(ys + 2, xs + i) + w3Vert * src->g(ys + 3, xs + i);
+        bv[i] = w0Vert * src->b(ys, xs + i) + w1Vert * src->b(ys + 1, xs + i) + w2Vert * src->b(ys + 2, xs + i) + w3Vert * src->b(ys + 3, xs + i);
+    }
+
+    // Horizontal
+    const float t1Hor = A * (Dx - Dx * Dx);
+    const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
+    const float w3Hor = t1Hor * Dx;
+    const float w2Hor = t1Hor * Dx - t1Hor + t2Hor;
+    const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor;
+    const float w0Hor = t1Hor - (t1Hor * Dx);
+
+    r = mul * (rv[0] * w0Hor + rv[1] * w1Hor + rv[2] * w2Hor + rv[3] * w3Hor);
+    g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor);
+    b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor);
+}
+
+inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul)
+{
+    constexpr float A = -0.85f;
+
+    // Vertical
+    const float t1Vert = A * (Dy - Dy * Dy);
+    const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy;
+    const float w3Vert = t1Vert * Dy;
+    const float w2Vert = t1Vert * Dy - t1Vert + t2Vert;
+    const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert;
+    const float w0Vert = t1Vert - (t1Vert * Dy);
+
+    float cv[4];
+    for (int i = 0; i < 4; ++i) {
+        cv[i] = w0Vert * src[ys][xs + i] + w1Vert * src[ys + 1][xs + i] + w2Vert * src[ys + 2][xs + i] + w3Vert * src[ys + 3][xs + i];
+    }
+
+    // Horizontal
+    const float t1Hor = A * (Dx - Dx * Dx);
+    const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
+    const float w3Hor = t1Hor * Dx;
+    const float w2Hor = t1Hor * Dx - t1Hor + t2Hor;
+    const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor;
+    const float w0Hor = t1Hor - (t1Hor * Dx);
+
+    dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor);
+}
+

 }

@@ -740,17 +801,18 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat*

 void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap)
 {
+    BENCHFUN
    // set up stuff, depending on the mode we are
-    bool enableLCPDist = pLCPMap && params->lensProf.useDist;
-    bool enableCA = highQuality && needsCA();
-    bool enableGradient = needsGradient();
-    bool enablePCVignetting = needsPCVignetting();
-    bool enableVignetting = needsVignetting();
-    bool enablePerspective = needsPerspective();
-    bool enableDistortion = needsDistortion();
+    const bool enableLCPDist = pLCPMap && params->lensProf.useDist;
+    const bool enableCA = highQuality && needsCA();
+    const bool enableGradient = needsGradient();
+    const bool enablePCVignetting = needsPCVignetting();
+    const bool enableVignetting = needsVignetting();
+    const bool enablePerspective = needsPerspective();
+    const bool enableDistortion = needsDistortion();

-    double w2 = (double) oW  / 2.0 - 0.5;
-    double h2 = (double) oH  / 2.0 - 0.5;
+    const double w2 = (double) oW  / 2.0 - 0.5;
+    const double h2 = (double) oH  / 2.0 - 0.5;

    double vig_w2, vig_h2, maxRadius, v, b, mul;
    calcVignettingParams (oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul);
@@ -784,11 +846,11 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
    chDist[2] = enableCA ? params->cacorrection.blue : 0.0;

    // auxiliary variables for distortion correction
-    double distAmount = params->distortion.amount;
+    const double distAmount = params->distortion.amount;

    // auxiliary variables for rotation
-    double cost = cos (params->rotate.degree * rtengine::RT_PI / 180.0);
-    double sint = sin (params->rotate.degree * rtengine::RT_PI / 180.0);
+    const double cost = cos (params->rotate.degree * rtengine::RT_PI / 180.0);
+    const double sint = sin (params->rotate.degree * rtengine::RT_PI / 180.0);

    // auxiliary variables for vertical perspective correction
    double vpdeg = params->perspective.vertical / 100.0 * 45.0;
@@ -814,9 +876,10 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
 #pragma GCC diagnostic pop
 #endif
    // main cycle
-    bool darkening = (params->vignetting.amount <= 0.0);
+    const bool darkening = (params->vignetting.amount <= 0.0);
+
 #ifdef _OPENMP
-    #pragma omp parallel for if (multiThread)
+    #pragma omp parallel for schedule(dynamic, 16) if(multiThread)
 #endif

    for (int y = 0; y < transformed->getHeight(); y++) {
@@ -833,13 +896,6 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
            x_d += ascale * (cx - w2);     // centering x coord & scale
            y_d += ascale * (cy - h2);     // centering y coord & scale

-            double vig_x_d = 0., vig_y_d = 0.;
-
-            if (enableVignetting) {
-                vig_x_d = ascale * (x + cx - vig_w2);       // centering x coord & scale
-                vig_y_d = ascale * (y + cy - vig_h2);       // centering y coord & scale
-            }
-
            if (enablePerspective) {
                // horizontal perspective transformation
                y_d *= maxRadius / (maxRadius + x_d * hptanpt);
@@ -862,14 +918,6 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
                s = 1.0 - distAmount + distAmount * r ;
            }

-            double r2 = 0.;
-
-            if (enableVignetting) {
-                double vig_Dx = vig_x_d * cost - vig_y_d * sint;
-                double vig_Dy = vig_x_d * sint + vig_y_d * cost;
-                r2 = sqrt (vig_Dx * vig_Dx + vig_Dy * vig_Dy);
-            }
-
            for (int c = 0; c < (enableCA ? 3 : 1); c++) {
                double Dx = Dxc * (s + chDist[c]);
                double Dy = Dyc * (s + chDist[c]);
@@ -893,6 +941,11 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
                    double vignmul = 1.0;

                    if (enableVignetting) {
+                        const double vig_x_d = ascale * (x + cx - vig_w2);       // centering x coord & scale
+                        const double vig_y_d = ascale * (y + cy - vig_h2);       // centering y coord & scale
+                        const double vig_Dx = vig_x_d * cost - vig_y_d * sint;
+                        const double vig_Dy = vig_x_d * sint + vig_y_d * cost;
+                        const double r2 = sqrt (vig_Dx * vig_Dx + vig_Dy * vig_Dy);
                        if (darkening) {
                            vignmul /= std::max (v + mul * tanh (b * (maxRadius - s * r2) / maxRadius), 0.001);
                        } else {
@@ -911,13 +964,13 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
                    if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) {
                        // all interpolation pixels inside image
                        if (enableCA) {
-                            interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, & (chTrans[c][y][x]), vignmul);
+                            interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul);
                        } else if (!highQuality) {
                            transformed->r (y, x) = vignmul * (original->r (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r (yc, xc + 1) * Dx * (1.0 - Dy) + original->r (yc + 1, xc) * (1.0 - Dx) * Dy + original->r (yc + 1, xc + 1) * Dx * Dy);
                            transformed->g (y, x) = vignmul * (original->g (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g (yc, xc + 1) * Dx * (1.0 - Dy) + original->g (yc + 1, xc) * (1.0 - Dx) * Dy + original->g (yc + 1, xc + 1) * Dx * Dy);
                            transformed->b (y, x) = vignmul * (original->b (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b (yc, xc + 1) * Dx * (1.0 - Dy) + original->b (yc + 1, xc) * (1.0 - Dx) * Dy + original->b (yc + 1, xc + 1) * Dx * Dy);
                        } else {
-                            interpolateTransformCubic (original, xc - 1, yc - 1, Dx, Dy, & (transformed->r (y, x)), & (transformed->g (y, x)), & (transformed->b (y, x)), vignmul);
+                            interpolateTransformCubic (original, xc - 1, yc - 1, Dx, Dy, transformed->r (y, x), transformed->g (y, x), transformed->b (y, x), vignmul);
                        }
                    } else {
                        // edge pixels
@@ -988,7 +1041,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
                    // multiplier for vignetting correction
                    if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) {
                        // all interpolation pixels inside image
-                        interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, & (chTrans[c][y][x]), 1.0);
+                        interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0);
                    } else {
                        // edge pixels
                        int y1 = LIM (yc,   0, original->getHeight() - 1);