SSE code for interpolateTransformCubic and interpolateTransformChannelsCubic, also some cleanups
This commit is contained in:
@@ -88,9 +88,34 @@ float normn (float a, float b, int n)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __SSE2__
|
||||||
|
inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)
|
||||||
|
{
|
||||||
|
constexpr float A = -0.85f;
|
||||||
|
|
||||||
|
// Vertical
|
||||||
|
const float t1Vert = A * (Dy - Dy * Dy);
|
||||||
|
const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy;
|
||||||
|
const vfloat w3Vert = F2V(t1Vert * Dy);
|
||||||
|
const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert);
|
||||||
|
const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert);
|
||||||
|
const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy));
|
||||||
|
|
||||||
|
const vfloat rv = (w0Vert * LVFU(src->r(ys, xs)) + w1Vert * LVFU(src->r(ys + 1, xs))) + (w2Vert * LVFU(src->r(ys + 2, xs)) + w3Vert * LVFU(src->r(ys + 3, xs)));
|
||||||
|
const vfloat gv = (w0Vert * LVFU(src->g(ys, xs)) + w1Vert * LVFU(src->g(ys + 1, xs))) + (w2Vert * LVFU(src->g(ys + 2, xs)) + w3Vert * LVFU(src->g(ys + 3, xs)));
|
||||||
|
const vfloat bv = (w0Vert * LVFU(src->b(ys, xs)) + w1Vert * LVFU(src->b(ys + 1, xs))) + (w2Vert * LVFU(src->b(ys + 2, xs)) + w3Vert * LVFU(src->b(ys + 3, xs)));
|
||||||
|
|
||||||
|
// Horizontal
|
||||||
|
const float t1Hor = A * (Dx - Dx * Dx);
|
||||||
|
const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
|
||||||
|
const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx)) * F2V(mul);
|
||||||
|
r = vhadd(weight * rv);
|
||||||
|
g = vhadd(weight * gv);
|
||||||
|
b = vhadd(weight * bv);
|
||||||
|
}
|
||||||
|
#else
|
||||||
inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)
|
inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys, float Dx, float Dy, float &r, float &g, float &b, float mul)
|
||||||
{
|
{
|
||||||
// I tried hand written SSE code but gcc vectorizes better
|
|
||||||
constexpr float A = -0.85f;
|
constexpr float A = -0.85f;
|
||||||
|
|
||||||
// Vertical
|
// Vertical
|
||||||
@@ -120,10 +145,31 @@ inline void interpolateTransformCubic(rtengine::Imagefloat* src, int xs, int ys,
|
|||||||
g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor);
|
g = mul * (gv[0] * w0Hor + gv[1] * w1Hor + gv[2] * w2Hor + gv[3] * w3Hor);
|
||||||
b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor);
|
b = mul * (bv[0] * w0Hor + bv[1] * w1Hor + bv[2] * w2Hor + bv[3] * w3Hor);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef __SSE2__
|
||||||
|
inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul)
|
||||||
|
{
|
||||||
|
constexpr float A = -0.85f;
|
||||||
|
|
||||||
|
// Vertical
|
||||||
|
const float t1Vert = A * (Dy - Dy * Dy);
|
||||||
|
const float t2Vert = (3.f - 2.f * Dy) * Dy * Dy;
|
||||||
|
const vfloat w3Vert = F2V(t1Vert * Dy);
|
||||||
|
const vfloat w2Vert = F2V(t1Vert * Dy - t1Vert + t2Vert);
|
||||||
|
const vfloat w1Vert = F2V(1.f - (t1Vert * Dy) - t2Vert);
|
||||||
|
const vfloat w0Vert = F2V(t1Vert - (t1Vert * Dy));
|
||||||
|
|
||||||
|
const vfloat cv = (w0Vert * LVFU(src[ys][xs]) + w1Vert * LVFU(src[ys + 1][xs])) + (w2Vert * LVFU(src[ys + 2][xs]) + w3Vert * LVFU(src[ys + 3][xs]));
|
||||||
|
|
||||||
|
// Horizontal
|
||||||
|
const float t1Hor = A * (Dx - Dx * Dx);
|
||||||
|
const float t2Hor = (3.f - 2.f * Dx) * Dx * Dx;
|
||||||
|
const vfloat weight = _mm_set_ps(t1Hor * Dx, t1Hor * Dx - t1Hor + t2Hor, 1.f - (t1Hor * Dx) - t2Hor, t1Hor - (t1Hor * Dx));
|
||||||
|
dest = mul * vhadd(weight * cv);
|
||||||
|
}
|
||||||
|
#else
|
||||||
inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul)
|
inline void interpolateTransformChannelsCubic(const float* const * src, int xs, int ys, float Dx, float Dy, float &dest, float mul)
|
||||||
{
|
{
|
||||||
// I tried hand written SSE code but gcc vectorizes better
|
|
||||||
constexpr float A = -0.85f;
|
constexpr float A = -0.85f;
|
||||||
|
|
||||||
// Vertical
|
// Vertical
|
||||||
@@ -149,7 +195,7 @@ inline void interpolateTransformChannelsCubic(const float* const * src, int xs,
|
|||||||
|
|
||||||
dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor);
|
dest = mul * (cv[0] * w0Hor + cv[1] * w1Hor + cv[2] * w2Hor + cv[3] * w3Hor);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -831,21 +877,11 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
|
|||||||
calcPCVignetteParams(fW, fH, oW, oH, params->pcvignette, params->crop, pcv);
|
calcPCVignetteParams(fW, fH, oW, oH, params->pcvignette, params->crop, pcv);
|
||||||
}
|
}
|
||||||
|
|
||||||
float** chOrig[3];
|
float** chOrig[3] = {original->r.ptrs, original->g.ptrs, original->b.ptrs};
|
||||||
chOrig[0] = original->r.ptrs;
|
float** chTrans[3] = {transformed->r.ptrs, transformed->g.ptrs, transformed->b.ptrs};
|
||||||
chOrig[1] = original->g.ptrs;
|
|
||||||
chOrig[2] = original->b.ptrs;
|
|
||||||
|
|
||||||
float** chTrans[3];
|
|
||||||
chTrans[0] = transformed->r.ptrs;
|
|
||||||
chTrans[1] = transformed->g.ptrs;
|
|
||||||
chTrans[2] = transformed->b.ptrs;
|
|
||||||
|
|
||||||
// auxiliary variables for c/a correction
|
// auxiliary variables for c/a correction
|
||||||
double chDist[3];
|
const double chDist[3] = {enableCA ? params->cacorrection.red : 0.0, 0.0, enableCA ? params->cacorrection.blue : 0.0};
|
||||||
chDist[0] = enableCA ? params->cacorrection.red : 0.0;
|
|
||||||
chDist[1] = 0.0;
|
|
||||||
chDist[2] = enableCA ? params->cacorrection.blue : 0.0;
|
|
||||||
|
|
||||||
// auxiliary variables for distortion correction
|
// auxiliary variables for distortion correction
|
||||||
const double distAmount = params->distortion.amount;
|
const double distAmount = params->distortion.amount;
|
||||||
@@ -855,31 +891,26 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
|
|||||||
const double sint = sin(params->rotate.degree * rtengine::RT_PI / 180.0);
|
const double sint = sin(params->rotate.degree * rtengine::RT_PI / 180.0);
|
||||||
|
|
||||||
// auxiliary variables for vertical perspective correction
|
// auxiliary variables for vertical perspective correction
|
||||||
double vpdeg = params->perspective.vertical / 100.0 * 45.0;
|
const double vpdeg = params->perspective.vertical / 100.0 * 45.0;
|
||||||
double vpalpha = (90.0 - vpdeg) / 180.0 * rtengine::RT_PI;
|
const double vpalpha = (90.0 - vpdeg) / 180.0 * rtengine::RT_PI;
|
||||||
double vpteta = fabs (vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos ((vpdeg > 0 ? 1.0 : -1.0) * sqrt ((-SQR (oW * tan (vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) *
|
const double vpteta = fabs(vpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((vpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oW * tan(vpalpha)) + (vpdeg > 0 ? 1.0 : -1.0) *
|
||||||
oW * tan(vpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oW * tan(vpalpha)))) / (SQR(maxRadius) * 8)));
|
oW * tan(vpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oW * tan(vpalpha)))) / (SQR(maxRadius) * 8)));
|
||||||
double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos (vpteta), vptanpt = tan (vpteta);
|
const double vpcospt = (vpdeg >= 0 ? 1.0 : -1.0) * cos(vpteta), vptanpt = tan(vpteta);
|
||||||
|
|
||||||
// auxiliary variables for horizontal perspective correction
|
// auxiliary variables for horizontal perspective correction
|
||||||
double hpdeg = params->perspective.horizontal / 100.0 * 45.0;
|
const double hpdeg = params->perspective.horizontal / 100.0 * 45.0;
|
||||||
double hpalpha = (90.0 - hpdeg) / 180.0 * rtengine::RT_PI;
|
const double hpalpha = (90.0 - hpdeg) / 180.0 * rtengine::RT_PI;
|
||||||
double hpteta = fabs (hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos ((hpdeg > 0 ? 1.0 : -1.0) * sqrt ((-SQR (oH * tan (hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) *
|
const double hpteta = fabs(hpalpha - rtengine::RT_PI / 2) < 3e-4 ? 0.0 : acos((hpdeg > 0 ? 1.0 : -1.0) * sqrt((-SQR(oH * tan(hpalpha)) + (hpdeg > 0 ? 1.0 : -1.0) *
|
||||||
oH * tan(hpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oH * tan(hpalpha)))) / (SQR(maxRadius) * 8)));
|
oH * tan(hpalpha) * sqrt(SQR(4 * maxRadius) + SQR(oH * tan(hpalpha)))) / (SQR(maxRadius) * 8)));
|
||||||
double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos (hpteta), hptanpt = tan (hpteta);
|
const double hpcospt = (hpdeg >= 0 ? 1.0 : -1.0) * cos(hpteta), hptanpt = tan(hpteta);
|
||||||
|
|
||||||
double ascale = params->commonTrans.autofill ? getTransformAutoFill (oW, oH, pLCPMap) : 1.0;
|
const double ascale = params->commonTrans.autofill ? getTransformAutoFill(oW, oH, pLCPMap) : 1.0;
|
||||||
|
|
||||||
#if defined( __GNUC__ ) && __GNUC__ >= 7// silence warning
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
|
|
||||||
#endif
|
|
||||||
#if defined( __GNUC__ ) && __GNUC__ >= 7
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
#endif
|
|
||||||
// main cycle
|
|
||||||
const bool darkening = (params->vignetting.amount <= 0.0);
|
const bool darkening = (params->vignetting.amount <= 0.0);
|
||||||
|
const double centerFactorx = cx - w2;
|
||||||
|
const double centerFactory = cy - h2;
|
||||||
|
|
||||||
|
// main cycle
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for schedule(dynamic, 16) if(multiThread)
|
#pragma omp parallel for schedule(dynamic, 16) if(multiThread)
|
||||||
#endif
|
#endif
|
||||||
@@ -895,8 +926,8 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
|
|||||||
y_d *= ascale;
|
y_d *= ascale;
|
||||||
}
|
}
|
||||||
|
|
||||||
x_d += ascale * (cx - w2); // centering x coord & scale
|
x_d += ascale * centerFactorx; // centering x coord & scale
|
||||||
y_d += ascale * (cy - h2); // centering y coord & scale
|
y_d += ascale * centerFactory; // centering y coord & scale
|
||||||
|
|
||||||
if (enablePerspective) {
|
if (enablePerspective) {
|
||||||
// horizontal perspective transformation
|
// horizontal perspective transformation
|
||||||
@@ -909,14 +940,14 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
|
|||||||
}
|
}
|
||||||
|
|
||||||
// rotate
|
// rotate
|
||||||
double Dxc = x_d * cost - y_d * sint;
|
const double Dxc = x_d * cost - y_d * sint;
|
||||||
double Dyc = x_d * sint + y_d * cost;
|
const double Dyc = x_d * sint + y_d * cost;
|
||||||
|
|
||||||
// distortion correction
|
// distortion correction
|
||||||
double s = 1;
|
double s = 1;
|
||||||
|
|
||||||
if (enableDistortion) {
|
if (enableDistortion) {
|
||||||
double r = sqrt (Dxc * Dxc + Dyc * Dyc) / maxRadius; // sqrt is slow
|
double r = sqrt(Dxc * Dxc + Dyc * Dyc) / maxRadius;
|
||||||
s = 1.0 - distAmount + distAmount * r;
|
s = 1.0 - distAmount + distAmount * r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user