From 9a624ca01e36208a8727f200554ed088f9f618e2 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 27 Aug 2019 13:25:34 +0200 Subject: [PATCH 1/5] Speedup for transform --- rtengine/improcfun.h | 110 ------------------------------------- rtengine/iptransform.cc | 119 +++++++++++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 143 deletions(-) diff --git a/rtengine/improcfun.h b/rtengine/improcfun.h index 987a460d7..338df2416 100644 --- a/rtengine/improcfun.h +++ b/rtengine/improcfun.h @@ -77,116 +77,6 @@ class ImProcFunctions bool needsLensfun(); // static cmsUInt8Number* Mempro = NULL; - inline void interpolateTransformCubic(Imagefloat* src, int xs, int ys, double Dx, double Dy, float *r, float *g, float *b, double mul) - { - const double A = -0.85; - - double w[4]; - - { - double t1, t2; - t1 = -A * (Dx - 1.0) * Dx; - t2 = (3.0 - 2.0 * Dx) * Dx * Dx; - w[3] = t1 * Dx; - w[2] = t1 * (Dx - 1.0) + t2; - w[1] = -t1 * Dx + 1.0 - t2; - w[0] = -t1 * (Dx - 1.0); - } - - double rd, gd, bd; - double yr[4] = {0.0}, yg[4] = {0.0}, yb[4] = {0.0}; - - for (int k = ys, kx = 0; k < ys + 4; k++, kx++) { - rd = gd = bd = 0.0; - - for (int i = xs, ix = 0; i < xs + 4; i++, ix++) { - rd += src->r(k, i) * w[ix]; - gd += src->g(k, i) * w[ix]; - bd += src->b(k, i) * w[ix]; - } - - yr[kx] = rd; - yg[kx] = gd; - yb[kx] = bd; - } - - - { - double t1, t2; - - t1 = -A * (Dy - 1.0) * Dy; - t2 = (3.0 - 2.0 * Dy) * Dy * Dy; - w[3] = t1 * Dy; - w[2] = t1 * (Dy - 1.0) + t2; - w[1] = -t1 * Dy + 1.0 - t2; - w[0] = -t1 * (Dy - 1.0); - } - - rd = gd = bd = 0.0; - - for (int i = 0; i < 4; i++) { - rd += yr[i] * w[i]; - gd += yg[i] * w[i]; - bd += yb[i] * w[i]; - } - - *r = rd * mul; - *g = gd * mul; - *b = bd * mul; - - // if (xs==100 && ys==100) - // printf ("r=%g, g=%g\n", *r, *g); - } - - inline void interpolateTransformChannelsCubic(float** src, int xs, int ys, double Dx, double Dy, float *r, double mul) - { - const double A = -0.85; - - double w[4]; - - { - double t1, t2; - t1 = -A * (Dx - 1.0) * Dx; - t2 = (3.0 - 2.0 * Dx) * Dx * Dx; - w[3] = t1 * Dx; - w[2] = t1 * (Dx - 1.0) + t2; - w[1] = -t1 * Dx + 1.0 - t2; - w[0] = -t1 * (Dx - 1.0); - } - - double rd; - double yr[4] = {0.0}; - - for (int k = ys, kx = 0; k < ys + 4; k++, kx++) { - rd = 0.0; - - for (int i = xs, ix = 0; i < xs + 4; i++, ix++) { - rd += src[k][i] * w[ix]; - } - - yr[kx] = rd; - } - - - { - double t1, t2; - t1 = -A * (Dy - 1.0) * Dy; - t2 = (3.0 - 2.0 * Dy) * Dy * Dy; - w[3] = t1 * Dy; - w[2] = t1 * (Dy - 1.0) + t2; - w[1] = -t1 * Dy + 1.0 - t2; - w[0] = -t1 * (Dy - 1.0); - } - - rd = 0.0; - - for (int i = 0; i < 4; i++) { - rd += yr[i] * w[i]; - } - - *r = rd * mul; - } - public: enum class Median { diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 5b1e7c458..1d7ac9d56 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -26,7 +26,8 @@ #include "rt_math.h" #include "sleef.c" #include "rtlensfun.h" - +#define BENCHMARK +#include "StopWatch.h" using namespace std; @@ -87,6 +88,66 @@ float normn (float a, float b, int n) } } +inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const float w3Vert = t1Vert * Dy; + const float w2Vert = t1Vert * Dy - t1Vert + t2Vert; + const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert; + const float w0Vert = t1Vert - (t1Vert * Dy); + + float rv[4], gv[4], bv[4]; + for (int i = 0; i < 4; ++i) { + rv[i] = w0Vert * src->r(ys, xs + i) + w1Vert * src->r(ys + 1, xs + i) + w2Vert * src->r(ys + 2, xs + i) + w3Vert * src->r(ys + 3, xs + i); + gv[i] = w0Vert * src->g(ys, xs + i) + w1Vert * src->g(ys + 1, xs + i) + w2Vert * src->g(ys + 2, xs + i) + w3Vert * src->g(ys + 3, xs + i); + bv[i] = w0Vert * src->b(ys, xs + i) + w1Vert * src->b(ys + 1, xs + i) + w2Vert * src->b(ys + 2, xs + i) + w3Vert * src->b(ys + 3, xs + i); + } + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const float w3Hor = t1Hor * Dx; + const float w2Hor = t1Hor * Dx - t1Hor + t2Hor; + const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor; + const float w0Hor = t1Hor - (t1Hor * Dx); + + r = mul * (rv[0] * w0Hor + rv[1] * w1Hor + rv[2] * w2Hor + rv[3] * w3Hor); + g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor); + b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor); +} + +inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const float w3Vert = t1Vert * Dy; + const float w2Vert = t1Vert * Dy - t1Vert + t2Vert; + const float w1Vert = 1.f - (t1Vert * Dy) - t2Vert; + const float w0Vert = t1Vert - (t1Vert * Dy); + + float cv[4]; + for (int i = 0; i < 4; ++i) { + cv[i] = w0Vert * src[ys][xs + i] + w1Vert * src[ys + 1][xs + i] + w2Vert * src[ys + 2][xs + i] + w3Vert * src[ys + 3][xs + i]; + } + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const float w3Hor = t1Hor * Dx; + const float w2Hor = t1Hor * Dx - t1Hor + t2Hor; + const float w1Hor = 1.f - (t1Hor * Dx) - t2Hor; + const float w0Hor = t1Hor - (t1Hor * Dx); + + dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor); +} + } @@ -740,17 +801,18 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat* void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap) { + BENCHFUN // set up stuff, depending on the mode we are - bool enableLCPDist = pLCPMap && params->lensProf.useDist; - bool enableCA = highQuality && needsCA(); - bool enableGradient = needsGradient(); - bool enablePCVignetting = needsPCVignetting(); - bool enableVignetting = needsVignetting(); - bool enablePerspective = needsPerspective(); - bool enableDistortion = needsDistortion(); + const bool enableLCPDist = pLCPMap && params->lensProf.useDist; + const bool enableCA = highQuality && needsCA(); + const bool enableGradient = needsGradient(); + const bool enablePCVignetting = needsPCVignetting(); + const bool enableVignetting = needsVignetting(); + const bool enablePerspective = needsPerspective(); + const bool enableDistortion = needsDistortion(); - double w2 = (double) oW / 2.0 - 0.5; - double h2 = (double) oH / 2.0 - 0.5; + const double w2 = (double) oW / 2.0 - 0.5; + const double h2 = (double) oH / 2.0 - 0.5; double vig_w2, vig_h2, maxRadius, v, b, mul; calcVignettingParams (oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul); @@ -784,11 +846,11 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I chDist[2] = enableCA ? params->cacorrection.blue : 0.0; // auxiliary variables for distortion correction - double distAmount = params->distortion.amount; + const double distAmount = params->distortion.amount; // auxiliary variables for rotation - double cost = cos (params->rotate.degree * rtengine::RT_PI / 180.0); - double sint = sin (params->rotate.degree * rtengine::RT_PI / 180.0); + const double cost = cos (params->rotate.degree * rtengine::RT_PI / 180.0); + const double sint = sin (params->rotate.degree * rtengine::RT_PI / 180.0); // auxiliary variables for vertical perspective correction double vpdeg = params->perspective.vertical / 100.0 * 45.0; @@ -814,9 +876,10 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I #pragma GCC diagnostic pop #endif // main cycle - bool darkening = (params->vignetting.amount <= 0.0); + const bool darkening = (params->vignetting.amount <= 0.0); + #ifdef _OPENMP - #pragma omp parallel for if (multiThread) + #pragma omp parallel for schedule(dynamic, 16) if(multiThread) #endif for (int y = 0; y < transformed->getHeight(); y++) { @@ -833,13 +896,6 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I x_d += ascale * (cx - w2); // centering x coord & scale y_d += ascale * (cy - h2); // centering y coord & scale - double vig_x_d = 0., vig_y_d = 0.; - - if (enableVignetting) { - vig_x_d = ascale * (x + cx - vig_w2); // centering x coord & scale - vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale - } - if (enablePerspective) { // horizontal perspective transformation y_d *= maxRadius / (maxRadius + x_d * hptanpt); @@ -862,14 +918,6 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I s = 1.0 - distAmount + distAmount * r ; } - double r2 = 0.; - - if (enableVignetting) { - double vig_Dx = vig_x_d * cost - vig_y_d * sint; - double vig_Dy = vig_x_d * sint + vig_y_d * cost; - r2 = sqrt (vig_Dx * vig_Dx + vig_Dy * vig_Dy); - } - for (int c = 0; c < (enableCA ? 3 : 1); c++) { double Dx = Dxc * (s + chDist[c]); double Dy = Dyc * (s + chDist[c]); @@ -893,6 +941,11 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I double vignmul = 1.0; if (enableVignetting) { + const double vig_x_d = ascale * (x + cx - vig_w2); // centering x coord & scale + const double vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale + const double vig_Dx = vig_x_d * cost - vig_y_d * sint; + const double vig_Dy = vig_x_d * sint + vig_y_d * cost; + const double r2 = sqrt (vig_Dx * vig_Dx + vig_Dy * vig_Dy); if (darkening) { vignmul /= std::max (v + mul * tanh (b * (maxRadius - s * r2) / maxRadius), 0.001); } else { @@ -911,13 +964,13 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image if (enableCA) { - interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, & (chTrans[c][y][x]), vignmul); + interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); } else if (!highQuality) { transformed->r (y, x) = vignmul * (original->r (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r (yc, xc + 1) * Dx * (1.0 - Dy) + original->r (yc + 1, xc) * (1.0 - Dx) * Dy + original->r (yc + 1, xc + 1) * Dx * Dy); transformed->g (y, x) = vignmul * (original->g (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g (yc, xc + 1) * Dx * (1.0 - Dy) + original->g (yc + 1, xc) * (1.0 - Dx) * Dy + original->g (yc + 1, xc + 1) * Dx * Dy); transformed->b (y, x) = vignmul * (original->b (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b (yc, xc + 1) * Dx * (1.0 - Dy) + original->b (yc + 1, xc) * (1.0 - Dx) * Dy + original->b (yc + 1, xc + 1) * Dx * Dy); } else { - interpolateTransformCubic (original, xc - 1, yc - 1, Dx, Dy, & (transformed->r (y, x)), & (transformed->g (y, x)), & (transformed->b (y, x)), vignmul); + interpolateTransformCubic (original, xc - 1, yc - 1, Dx, Dy, transformed->r (y, x), transformed->g (y, x), transformed->b (y, x), vignmul); } } else { // edge pixels @@ -988,7 +1041,7 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans // multiplier for vignetting correction if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image - interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, & (chTrans[c][y][x]), 1.0); + interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], 1.0); } else { // edge pixels int y1 = LIM (yc, 0, original->getHeight() - 1); From 5a5952dddbbeb5ba901950e43592aea338d22e60 Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 27 Aug 2019 13:36:52 +0200 Subject: [PATCH 2/5] added two comments --- rtengine/iptransform.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 1d7ac9d56..9484599d6 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -90,6 +90,7 @@ float normn (float a, float b, int n) inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) { + // I tried hand written SSE code but gcc vectorizes better constexpr float A = -0.85f; // Vertical @@ -122,6 +123,7 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) { + // I tried hand written SSE code but gcc vectorizes better constexpr float A = -0.85f; // Vertical From 4312e682657590ef8c30f4056baa49613bdd16ed Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Tue, 27 Aug 2019 19:59:10 +0200 Subject: [PATCH 3/5] SSE code for interpolateTransformCubic and interpolateTransformChannelsCubic, also some cleanups --- rtengine/iptransform.cc | 165 ++++++++++++++++++++++++---------------- 1 file changed, 98 insertions(+), 67 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 9484599d6..608fa0337 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -88,9 +88,34 @@ float normn (float a, float b, int n) } } +#ifdef __SSE2__ +inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const vfloat w3Vert = F2V(t1Vert * Dy); + const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert); + const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); + const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); + + const vfloat rv = (w0Vert * LVFU(src->r(ys, xs)) + w1Vert * LVFU(src->r(ys + 1, xs))) + (w2Vert * LVFU(src->r(ys + 2, xs)) + w3Vert * LVFU(src->r(ys + 3, xs))); + const vfloat gv = (w0Vert * LVFU(src->g(ys, xs)) + w1Vert * LVFU(src->g(ys + 1, xs))) + (w2Vert * LVFU(src->g(ys + 2, xs)) + w3Vert * LVFU(src->g(ys + 3, xs))); + const vfloat bv = (w0Vert * LVFU(src->b(ys, xs)) + w1Vert * LVFU(src->b(ys + 1, xs))) + (w2Vert * LVFU(src->b(ys + 2, xs)) + w3Vert * LVFU(src->b(ys + 3, xs))); + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)) * F2V(mul); + r = vhadd(weight * rv); + g = vhadd(weight * gv); + b = vhadd(weight * bv); +} +#else inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul) { - // I tried hand written SSE code but gcc vectorizes better constexpr float A = -0.85f; // Vertical @@ -120,10 +145,31 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor); b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor); } - +#endif +#ifdef __SSE2__ +inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) +{ + constexpr float A = -0.85f; + + // Vertical + const float t1Vert = A * (Dy - Dy * Dy); + const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy; + const vfloat w3Vert = F2V(t1Vert * Dy); + const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert); + const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert); + const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy)); + + const vfloat cv = (w0Vert * LVFU(src[ys][xs]) + w1Vert * LVFU(src[ys + 1][xs])) + (w2Vert * LVFU(src[ys + 2][xs]) + w3Vert * LVFU(src[ys + 3][xs])); + + // Horizontal + const float t1Hor = A * (Dx - Dx * Dx); + const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx; + const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)); + dest = mul * vhadd(weight * cv); +} +#else inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) { - // I tried hand written SSE code but gcc vectorizes better constexpr float A = -0.85f; // Vertical @@ -149,7 +195,7 @@ inline void interpolateTransformChannelsCubic(const float* const * src, int xs, dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor); } - +#endif } @@ -817,69 +863,54 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double h2 = (double) oH / 2.0 - 0.5; double vig_w2, vig_h2, maxRadius, v, b, mul; - calcVignettingParams (oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul); + calcVignettingParams(oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul); struct grad_params gp; if (enableGradient) { - calcGradientParams (oW, oH, params->gradient, gp); + calcGradientParams(oW, oH, params->gradient, gp); } struct pcv_params pcv; if (enablePCVignetting) { - calcPCVignetteParams (fW, fH, oW, oH, params->pcvignette, params->crop, pcv); + calcPCVignetteParams(fW, fH, oW, oH, params->pcvignette, params->crop, pcv); } - float** chOrig[3]; - chOrig[0] = original->r.ptrs; - chOrig[1] = original->g.ptrs; - chOrig[2] = original->b.ptrs; - - float** chTrans[3]; - chTrans[0] = transformed->r.ptrs; - chTrans[1] = transformed->g.ptrs; - chTrans[2] = transformed->b.ptrs; + float** chOrig[3] = {original->r.ptrs, original->g.ptrs, original->b.ptrs}; + float** chTrans[3] = {transformed->r.ptrs, transformed->g.ptrs, transformed->b.ptrs}; // auxiliary variables for c/a correction - double chDist[3]; - chDist[0] = enableCA ? params->cacorrection.red : 0.0; - chDist[1] = 0.0; - chDist[2] = enableCA ? params->cacorrection.blue : 0.0; + const double chDist[3] = {enableCA ? params->cacorrection.red : 0.0, 0.0, enableCA ? params->cacorrection.blue : 0.0}; // auxiliary variables for distortion correction const double distAmount = params->distortion.amount; // auxiliary variables for rotation - const double cost = cos (params->rotate.degree * rtengine::RT_PI / 180.0); - const double sint = sin (params->rotate.degree * rtengine::RT_PI / 180.0); + const double cost = cos(params->rotate.degree * rtengine::RT_PI / 180.0); + const double sint = sin(params->rotate.degree * rtengine::RT_PI / 180.0); // auxiliary variables for vertical perspective correction - double vpdeg = params->perspective.vertical / 100.0 * 45.0; - double vpalpha = (90.0 - vpdeg) / 180.0 * rtengine::RT_PI; - double vpteta = fabs (vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos ((vpdeg > 0 ? 1.0 : -1.0) * sqrt ((-SQR (oW * tan (vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) * - oW * tan (vpalpha) * sqrt (SQR (4 * maxRadius) + SQR (oW * tan (vpalpha)))) / (SQR (maxRadius) * 8))); - double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos (vpteta), vptanpt = tan (vpteta); + const double vpdeg = params->perspective.vertical / 100.0 * 45.0; + const double vpalpha = (90.0 - vpdeg) / 180.0 * rtengine::RT_PI; + const double vpteta = fabs(vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((vpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oW * tan(vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) * + oW * tan(vpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oW * tan(vpalpha)))) / (SQR(maxRadius) * 8))); + const double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos(vpteta), vptanpt = tan(vpteta); // auxiliary variables for horizontal perspective correction - double hpdeg = params->perspective.horizontal / 100.0 * 45.0; - double hpalpha = (90.0 - hpdeg) / 180.0 * rtengine::RT_PI; - double hpteta = fabs (hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos ((hpdeg > 0 ? 1.0 : -1.0) * sqrt ((-SQR (oH * tan (hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) * - oH * tan (hpalpha) * sqrt (SQR (4 * maxRadius) + SQR (oH * tan (hpalpha)))) / (SQR (maxRadius) * 8))); - double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos (hpteta), hptanpt = tan (hpteta); + const double hpdeg = params->perspective.horizontal / 100.0 * 45.0; + const double hpalpha = (90.0 - hpdeg) / 180.0 * rtengine::RT_PI; + const double hpteta = fabs(hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((hpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oH * tan(hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) * + oH * tan(hpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oH * tan(hpalpha)))) / (SQR(maxRadius) * 8))); + const double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos(hpteta), hptanpt = tan(hpteta); - double ascale = params->commonTrans.autofill ? getTransformAutoFill (oW, oH, pLCPMap) : 1.0; + const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0; -#if defined( __GNUC__ ) && __GNUC__ >= 7// silence warning -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" -#endif -#if defined( __GNUC__ ) && __GNUC__ >= 7 -#pragma GCC diagnostic pop -#endif - // main cycle const bool darkening = (params->vignetting.amount <= 0.0); + const double centerFactorx = cx - w2; + const double centerFactory = cy - h2; + // main cycle #ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 16) if(multiThread) #endif @@ -895,8 +926,8 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I y_d *= ascale; } - x_d += ascale * (cx - w2); // centering x coord & scale - y_d += ascale * (cy - h2); // centering y coord & scale + x_d += ascale * centerFactorx; // centering x coord & scale + y_d += ascale * centerFactory; // centering y coord & scale if (enablePerspective) { // horizontal perspective transformation @@ -909,15 +940,15 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I } // rotate - double Dxc = x_d * cost - y_d * sint; - double Dyc = x_d * sint + y_d * cost; + const double Dxc = x_d * cost - y_d * sint; + const double Dyc = x_d * sint + y_d * cost; // distortion correction double s = 1; if (enableDistortion) { - double r = sqrt (Dxc * Dxc + Dyc * Dyc) / maxRadius; // sqrt is slow - s = 1.0 - distAmount + distAmount * r ; + double r = sqrt(Dxc * Dxc + Dyc * Dyc) / maxRadius; + s = 1.0 - distAmount + distAmount * r; } for (int c = 0; c < (enableCA ? 3 : 1); c++) { @@ -947,46 +978,46 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale const double vig_Dx = vig_x_d * cost - vig_y_d * sint; const double vig_Dy = vig_x_d * sint + vig_y_d * cost; - const double r2 = sqrt (vig_Dx * vig_Dx + vig_Dy * vig_Dy); + const double r2 = sqrt(vig_Dx * vig_Dx + vig_Dy * vig_Dy); if (darkening) { - vignmul /= std::max (v + mul * tanh (b * (maxRadius - s * r2) / maxRadius), 0.001); + vignmul /= std::max(v + mul * tanh(b * (maxRadius - s * r2) / maxRadius), 0.001); } else { - vignmul *= (v + mul * tanh (b * (maxRadius - s * r2) / maxRadius)); + vignmul *= (v + mul * tanh(b * (maxRadius - s * r2) / maxRadius)); } } if (enableGradient) { - vignmul *= calcGradientFactor (gp, cx + x, cy + y); + vignmul *= calcGradientFactor(gp, cx + x, cy + y); } if (enablePCVignetting) { - vignmul *= calcPCVignetteFactor (pcv, cx + x, cy + y); + vignmul *= calcPCVignetteFactor(pcv, cx + x, cy + y); } if (yc > 0 && yc < original->getHeight() - 2 && xc > 0 && xc < original->getWidth() - 2) { // all interpolation pixels inside image if (enableCA) { - interpolateTransformChannelsCubic (chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); + interpolateTransformChannelsCubic(chOrig[c], xc - 1, yc - 1, Dx, Dy, chTrans[c][y][x], vignmul); } else if (!highQuality) { - transformed->r (y, x) = vignmul * (original->r (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r (yc, xc + 1) * Dx * (1.0 - Dy) + original->r (yc + 1, xc) * (1.0 - Dx) * Dy + original->r (yc + 1, xc + 1) * Dx * Dy); - transformed->g (y, x) = vignmul * (original->g (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g (yc, xc + 1) * Dx * (1.0 - Dy) + original->g (yc + 1, xc) * (1.0 - Dx) * Dy + original->g (yc + 1, xc + 1) * Dx * Dy); - transformed->b (y, x) = vignmul * (original->b (yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b (yc, xc + 1) * Dx * (1.0 - Dy) + original->b (yc + 1, xc) * (1.0 - Dx) * Dy + original->b (yc + 1, xc + 1) * Dx * Dy); + transformed->r(y, x) = vignmul * (original->r(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->r(yc, xc + 1) * Dx * (1.0 - Dy) + original->r(yc + 1, xc) * (1.0 - Dx) * Dy + original->r(yc + 1, xc + 1) * Dx * Dy); + transformed->g(y, x) = vignmul * (original->g(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->g(yc, xc + 1) * Dx * (1.0 - Dy) + original->g(yc + 1, xc) * (1.0 - Dx) * Dy + original->g(yc + 1, xc + 1) * Dx * Dy); + transformed->b(y, x) = vignmul * (original->b(yc, xc) * (1.0 - Dx) * (1.0 - Dy) + original->b(yc, xc + 1) * Dx * (1.0 - Dy) + original->b(yc + 1, xc) * (1.0 - Dx) * Dy + original->b(yc + 1, xc + 1) * Dx * Dy); } else { - interpolateTransformCubic (original, xc - 1, yc - 1, Dx, Dy, transformed->r (y, x), transformed->g (y, x), transformed->b (y, x), vignmul); + interpolateTransformCubic(original, xc - 1, yc - 1, Dx, Dy, transformed->r(y, x), transformed->g(y, x), transformed->b(y, x), vignmul); } } else { // edge pixels - int y1 = LIM (yc, 0, original->getHeight() - 1); - int y2 = LIM (yc + 1, 0, original->getHeight() - 1); - int x1 = LIM (xc, 0, original->getWidth() - 1); - int x2 = LIM (xc + 1, 0, original->getWidth() - 1); + int y1 = LIM(yc, 0, original->getHeight() - 1); + int y2 = LIM(yc + 1, 0, original->getHeight() - 1); + int x1 = LIM(xc, 0, original->getWidth() - 1); + int x2 = LIM(xc + 1, 0, original->getWidth() - 1); if (enableCA) { chTrans[c][y][x] = vignmul * (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); } else { - transformed->r (y, x) = vignmul * (original->r (y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r (y1, x2) * Dx * (1.0 - Dy) + original->r (y2, x1) * (1.0 - Dx) * Dy + original->r (y2, x2) * Dx * Dy); - transformed->g (y, x) = vignmul * (original->g (y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g (y1, x2) * Dx * (1.0 - Dy) + original->g (y2, x1) * (1.0 - Dx) * Dy + original->g (y2, x2) * Dx * Dy); - transformed->b (y, x) = vignmul * (original->b (y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b (y1, x2) * Dx * (1.0 - Dy) + original->b (y2, x1) * (1.0 - Dx) * Dy + original->b (y2, x2) * Dx * Dy); + transformed->r(y, x) = vignmul * (original->r(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->r(y1, x2) * Dx * (1.0 - Dy) + original->r(y2, x1) * (1.0 - Dx) * Dy + original->r(y2, x2) * Dx * Dy); + transformed->g(y, x) = vignmul * (original->g(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->g(y1, x2) * Dx * (1.0 - Dy) + original->g(y2, x1) * (1.0 - Dx) * Dy + original->g(y2, x2) * Dx * Dy); + transformed->b(y, x) = vignmul * (original->b(y1, x1) * (1.0 - Dx) * (1.0 - Dy) + original->b(y1, x2) * Dx * (1.0 - Dy) + original->b(y2, x1) * (1.0 - Dx) * Dy + original->b(y2, x2) * Dx * Dy); } } } else { @@ -994,9 +1025,9 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I // not valid (source pixel x,y not inside source image, etc.) chTrans[c][y][x] = 0; } else { - transformed->r (y, x) = 0; - transformed->g (y, x) = 0; - transformed->b (y, x) = 0; + transformed->r(y, x) = 0; + transformed->g(y, x) = 0; + transformed->b(y, x) = 0; } } } From 9a020899a32447be884d0e4b9b3c744cefbe00f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fl=C3=B6ssie?= Date: Wed, 11 Sep 2019 09:16:06 +0200 Subject: [PATCH 4/5] Minor cleanups --- rtengine/iptransform.cc | 95 +++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 36 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 608fa0337..1e739dac6 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -16,16 +16,21 @@ * You should have received a copy of the GNU General Public License * along with RawTherapee. If not, see . */ -#include "rtengine.h" -#include "improcfun.h" -#include "procparams.h" +#include + #ifdef _OPENMP #include #endif + +#include "improcfun.h" + #include "mytime.h" +#include "procparams.h" #include "rt_math.h" -#include "sleef.c" +#include "rtengine.h" #include "rtlensfun.h" +#include "sleef.c" + #define BENCHMARK #include "StopWatch.h" @@ -147,7 +152,7 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, } #endif #ifdef __SSE2__ -inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) +inline void interpolateTransformChannelsCubic(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul) { constexpr float A = -0.85f; @@ -168,7 +173,7 @@ inline void interpolateTransformChannelsCubic(const float* const * src, int xs, dest = mul * vhadd(weight * cv); } #else -inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul) +inline void interpolateTransformChannelsCubic(const float* const* src, int xs, int ys, float Dx, float Dy, float& dest, float mul) { constexpr float A = -0.85f; @@ -859,29 +864,45 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const bool enablePerspective = needsPerspective(); const bool enableDistortion = needsDistortion(); - const double w2 = (double) oW / 2.0 - 0.5; - const double h2 = (double) oH / 2.0 - 0.5; + const double w2 = static_cast(oW) / 2.0 - 0.5; + const double h2 = static_cast(oH) / 2.0 - 0.5; double vig_w2, vig_h2, maxRadius, v, b, mul; calcVignettingParams(oW, oH, params->vignetting, vig_w2, vig_h2, maxRadius, v, b, mul); - struct grad_params gp; + grad_params gp; if (enableGradient) { calcGradientParams(oW, oH, params->gradient, gp); } - struct pcv_params pcv; + pcv_params pcv; if (enablePCVignetting) { calcPCVignetteParams(fW, fH, oW, oH, params->pcvignette, params->crop, pcv); } - float** chOrig[3] = {original->r.ptrs, original->g.ptrs, original->b.ptrs}; - float** chTrans[3] = {transformed->r.ptrs, transformed->g.ptrs, transformed->b.ptrs}; + const std::array chOrig = { + original->r.ptrs, + original->g.ptrs, + original->b.ptrs + }; + const std::array chTrans = { + transformed->r.ptrs, + transformed->g.ptrs, + transformed->b.ptrs + }; // auxiliary variables for c/a correction - const double chDist[3] = {enableCA ? params->cacorrection.red : 0.0, 0.0, enableCA ? params->cacorrection.blue : 0.0}; + const std::array chDist = { + enableCA + ? params->cacorrection.red + : 0.0, + 0.0, + enableCA + ? params->cacorrection.blue + : 0.0 + }; // auxiliary variables for distortion correction const double distAmount = params->distortion.amount; @@ -893,16 +914,18 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I // auxiliary variables for vertical perspective correction const double vpdeg = params->perspective.vertical / 100.0 * 45.0; const double vpalpha = (90.0 - vpdeg) / 180.0 * rtengine::RT_PI; - const double vpteta = fabs(vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((vpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oW * tan(vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) * - oW * tan(vpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oW * tan(vpalpha)))) / (SQR(maxRadius) * 8))); - const double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos(vpteta), vptanpt = tan(vpteta); + const double vpteta = fabs(vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((vpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oW * tan(vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) * + oW * tan(vpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oW * tan(vpalpha)))) / (SQR(maxRadius) * 8))); + const double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos(vpteta); + const double vptanpt = tan(vpteta); // auxiliary variables for horizontal perspective correction const double hpdeg = params->perspective.horizontal / 100.0 * 45.0; const double hpalpha = (90.0 - hpdeg) / 180.0 * rtengine::RT_PI; - const double hpteta = fabs(hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((hpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oH * tan(hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) * - oH * tan(hpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oH * tan(hpalpha)))) / (SQR(maxRadius) * 8))); - const double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos(hpteta), hptanpt = tan(hpteta); + const double hpteta = fabs(hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((hpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oH * tan(hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) * + oH * tan(hpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oH * tan(hpalpha)))) / (SQR(maxRadius) * 8))); + const double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos(hpteta); + const double hptanpt = tan(hpteta); const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0; @@ -915,9 +938,10 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I #pragma omp parallel for schedule(dynamic, 16) if(multiThread) #endif - for (int y = 0; y < transformed->getHeight(); y++) { - for (int x = 0; x < transformed->getWidth(); x++) { - double x_d = x, y_d = y; + for (int y = 0; y < transformed->getHeight(); ++y) { + for (int x = 0; x < transformed->getWidth(); ++x) { + double x_d = x; + double y_d = y; if (enableLCPDist) { pLCPMap->correctDistortion(x_d, y_d, cx, cy, ascale); // must be first transform @@ -944,14 +968,14 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I const double Dyc = x_d * sint + y_d * cost; // distortion correction - double s = 1; + double s = 1.0; if (enableDistortion) { - double r = sqrt(Dxc * Dxc + Dyc * Dyc) / maxRadius; + const double r = sqrt(Dxc * Dxc + Dyc * Dyc) / maxRadius; s = 1.0 - distAmount + distAmount * r; } - for (int c = 0; c < (enableCA ? 3 : 1); c++) { + for (int c = 0; c < (enableCA ? 3 : 1); ++c) { double Dx = Dxc * (s + chDist[c]); double Dy = Dyc * (s + chDist[c]); @@ -960,22 +984,21 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I Dy += h2; // Extract integer and fractions of source screen coordinates - int xc = (int)Dx; - Dx -= (double)xc; + int xc = Dx; + Dx -= xc; xc -= sx; - int yc = (int)Dy; - Dy -= (double)yc; + int yc = Dy; + Dy -= yc; yc -= sy; // Convert only valid pixels if (yc >= 0 && yc < original->getHeight() && xc >= 0 && xc < original->getWidth()) { - // multiplier for vignetting correction double vignmul = 1.0; if (enableVignetting) { - const double vig_x_d = ascale * (x + cx - vig_w2); // centering x coord & scale - const double vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale + const double vig_x_d = ascale * (x + cx - vig_w2); // centering x coord & scale + const double vig_y_d = ascale * (y + cy - vig_h2); // centering y coord & scale const double vig_Dx = vig_x_d * cost - vig_y_d * sint; const double vig_Dy = vig_x_d * sint + vig_y_d * cost; const double r2 = sqrt(vig_Dx * vig_Dx + vig_Dy * vig_Dy); @@ -1007,10 +1030,10 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I } } else { // edge pixels - int y1 = LIM(yc, 0, original->getHeight() - 1); - int y2 = LIM(yc + 1, 0, original->getHeight() - 1); - int x1 = LIM(xc, 0, original->getWidth() - 1); - int x2 = LIM(xc + 1, 0, original->getWidth() - 1); + const int y1 = LIM(yc, 0, original->getHeight() - 1); + const int y2 = LIM(yc + 1, 0, original->getHeight() - 1); + const int x1 = LIM(xc, 0, original->getWidth() - 1); + const int x2 = LIM(xc + 1, 0, original->getWidth() - 1); if (enableCA) { chTrans[c][y][x] = vignmul * (chOrig[c][y1][x1] * (1.0 - Dx) * (1.0 - Dy) + chOrig[c][y1][x2] * Dx * (1.0 - Dy) + chOrig[c][y2][x1] * (1.0 - Dx) * Dy + chOrig[c][y2][x2] * Dx * Dy); From 796e8f02896c0506dd9c403572adabf80187e03b Mon Sep 17 00:00:00 2001 From: Ingo Weyrich Date: Mon, 16 Sep 2019 17:17:49 +0200 Subject: [PATCH 5/5] Removed timing code --- rtengine/iptransform.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index 1e739dac6..1a38d8e2f 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -31,9 +31,6 @@ #include "rtlensfun.h" #include "sleef.c" -#define BENCHMARK -#include "StopWatch.h" - using namespace std; namespace @@ -854,7 +851,6 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat* void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, Imagefloat *transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LensCorrection *pLCPMap) { - BENCHFUN // set up stuff, depending on the mode we are const bool enableLCPDist = pLCPMap && params->lensProf.useDist; const bool enableCA = highQuality && needsCA();