From 87a280f8cafeb8aeaab81292b12a4b7fec61dcc3 Mon Sep 17 00:00:00 2001 From: heckflosse Date: Tue, 21 Feb 2017 19:11:54 +0100 Subject: [PATCH] Additional speedup for lcp vignette correction --- rtengine/lcp.cc | 77 ++++++++++++++++++++++++++------------ rtengine/lcp.h | 9 +---- rtengine/rawimagesource.cc | 31 ++------------- 3 files changed, 58 insertions(+), 59 deletions(-) diff --git a/rtengine/lcp.cc b/rtengine/lcp.cc index 8579efcbd..5dd51b58f 100644 --- a/rtengine/lcp.cc +++ b/rtengine/lcp.cc @@ -37,10 +37,6 @@ LCPModelCommon::LCPModelCommon() imgXCenter = imgYCenter = 0.5; x0 = y0 = fx = fy = rfx = rfy = meanErr = 0; -#if defined( __SSE2__ ) && defined( __x86_64__ ) - x0v = y0v = rfxv = rfyv = ZEROV; -#endif - badErr = false; for (int i = 0; i < 5; i++) { @@ -85,12 +81,6 @@ void LCPModelCommon::merge(const LCPModelCommon& a, const LCPModelCommon& b, flo vignParam[2] = param0Sqr * param[0] - 2. * param[0] * param[1] + param[2]; vignParam[3] = param0Sqr * param0Sqr + param[1] * param[1] + 2. * param[0] * param[2] - 3. * param0Sqr * param[1]; -#if defined( __SSE2__ ) && defined( __x86_64__ ) - vignParamv[0] = F2V(vignParam[0]); - vignParamv[1] = F2V(vignParam[1]); - vignParamv[2] = F2V(vignParam[2]); - vignParamv[3] = F2V(vignParam[3]); -#endif // __SSE2__ } void LCPModelCommon::prepareParams(int fullWidth, int fullHeight, float focalLength, float focalLength35mm, float sensorFormatFactor, bool swapXY, bool mirrorX, bool mirrorY) @@ -126,13 +116,6 @@ void LCPModelCommon::prepareParams(int fullWidth, int fullHeight, float focalLen rfx = 1.0 / fx; rfy = 1.0 / fy; -#if defined( __SSE2__ ) && defined( __x86_64__ ) - x0v = F2V(x0); - y0v = F2V(y0); - rfxv = F2V(rfx); - rfyv = F2V(rfy); -#endif - //printf("FW %i /X0 %g FH %i /Y0 %g %g\n",fullWidth,x0,fullHeight,y0, imgYCenter); } @@ -288,19 +271,65 @@ float LCPMapper::calcVignetteFac(int x, int y) const const float* vignParam = mc.vignParam; float rsqr = xd * xd + yd * yd; - return 1.f + rsqr * (vignParam[0] + rsqr * ((vignParam[1]) - (vignParam[2]) * rsqr + (vignParam[3]) * rsqr * rsqr)); + return rsqr * (vignParam[0] + rsqr * ((vignParam[1]) - (vignParam[2]) * rsqr + (vignParam[3]) * rsqr * rsqr)); } -#if defined( __SSE2__ ) && defined( __x86_64__ ) -vfloat LCPMapper::calcVignetteFac(vfloat x, vfloat y) const +SSEFUNCTION void LCPMapper::processVignetteLine(int width, int y, float *line) const { // No need for swapXY, since vignette is in RAW and always before rotation - vfloat xd = (x - mc.x0v) * mc.rfxv, yd = (y - mc.y0v) * mc.rfyv; - vfloat rsqr = xd * xd + yd * yd; + float yd = ((float)y - mc.y0) * mc.rfy; + yd *= yd; + int x = 0; +#ifdef __SSE2__ + const vfloat fourv = F2V(4.f); + const vfloat zerov = F2V(0.f); + const vfloat ydv = F2V(yd); + const vfloat p0 = F2V(mc.vignParam[0]); + const vfloat p1 = F2V(mc.vignParam[1]); + const vfloat p2 = F2V(mc.vignParam[2]); + const vfloat p3 = F2V(mc.vignParam[3]); + const vfloat x0v = F2V(mc.x0); + const vfloat rfxv = F2V(mc.rfx); - return F2V(1.f) + rsqr * (mc.vignParamv[0] + rsqr * (mc.vignParamv[1] - mc.vignParamv[2] * rsqr + mc.vignParamv[3] * rsqr * rsqr)); + vfloat xv = _mm_setr_ps(0.f, 1.f, 2.f, 3.f); + for (; x < width-3; x+=4) { + vfloat xdv = (xv - x0v) * rfxv; + vfloat rsqr = xdv * xdv + ydv; + vfloat vignFactorv = rsqr * (p0 + rsqr * (p1 - p2 * rsqr + p3 * rsqr * rsqr)); + vfloat valv = LVFU(line[x]); + valv += valv * vselfzero(vmaskf_gt(valv, zerov), vignFactorv); + STVFU(line[x], valv); + xv += fourv; + } +#endif // __SSE2__ + for (; x < width; x++) { + if (line[x] > 0) { + float xd = ((float)x - mc.x0) * mc.rfx; + const float* vignParam = mc.vignParam; + float rsqr = xd * xd + yd; + line[x] += line[x] * rsqr * (vignParam[0] + rsqr * ((vignParam[1]) - (vignParam[2]) * rsqr + (vignParam[3]) * rsqr * rsqr)); + } + } } -#endif + +SSEFUNCTION void LCPMapper::processVignetteLine3Channels(int width, int y, float *line) const +{ + // No need for swapXY, since vignette is in RAW and always before rotation + float yd = ((float)y - mc.y0) * mc.rfy; + yd *= yd; + const float* vignParam = mc.vignParam; + for (int x = 0; x < width; x++) { + float xd = ((float)x - mc.x0) * mc.rfx; + float rsqr = xd * xd + yd; + float vignetteFactor = rsqr * (vignParam[0] + rsqr * ((vignParam[1]) - (vignParam[2]) * rsqr + (vignParam[3]) * rsqr * rsqr)); + for(int c = 0;c < 3; ++c) { + if (line[3*x+c] > 0) { + line[3*x+c] += line[3*x+c] * vignetteFactor; + } + } + } +} + LCPProfile::LCPProfile(const Glib::ustring &fname) { diff --git a/rtengine/lcp.h b/rtengine/lcp.h index 74d06c958..449add15c 100644 --- a/rtengine/lcp.h +++ b/rtengine/lcp.h @@ -42,10 +42,6 @@ public: float x0, y0, fx, fy; // prepared params float rfx, rfy; float vignParam[4]; -#if defined( __SSE2__ ) && defined( __x86_64__ ) - vfloat vignParamv[4] ALIGNED16; - vfloat x0v, y0v, rfxv, rfyv; -#endif LCPModelCommon(); bool empty() const; // is it empty void print() const; // printf all values @@ -137,9 +133,8 @@ public: void correctDistortion(double& x, double& y) const; // MUST be the first stage void correctCA(double& x, double& y, int channel) const; float calcVignetteFac (int x, int y) const; // MUST be in RAW -#if defined( __SSE2__ ) && defined( __x86_64__ ) - vfloat calcVignetteFac(vfloat x, vfloat y) const; -#endif + void processVignetteLine(int width, int y, float *line) const; + void processVignetteLine3Channels(int width, int y, float *line) const; }; } #endif diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 47613cf1a..995e3d9bd 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -1771,31 +1771,13 @@ void RawImageSource::preprocess (const RAWParams &raw, const LensProfParams &le LCPMapper map(pLCPProf, max(idata->getFocalLen(), 1.0), idata->getFocalLen35mm(), idata->getFocusDist(), idata->getFNumber(), true, false, W, H, coarse, -1); if (ri->getSensorType() == ST_BAYER || ri->getSensorType() == ST_FUJI_XTRANS || ri->get_colors() == 1) { + #ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) #endif for (int y = 0; y < H; y++) { - int x = 0; - -#if defined( __SSE2__ ) && defined( __x86_64__ ) - vfloat yv = F2V(y); - vfloat fourv = F2V(4.f); - vfloat onev = F2V(1.f); - vfloat xv = _mm_set_ps(3,2,1,0); - for (; x < W-3; x+=4) { - vfloat vignFactorv = map.calcVignetteFac(xv, yv); - vfloat rawValv = LVFU(rawData[y][x]); - rawValv *= vself(vmaskf_gt(rawValv, ZEROV), vignFactorv, onev); - STVFU(rawData[y][x], rawValv); - xv += fourv; - } -#endif - for (; x < W; x++) { - if (rawData[y][x] > 0) { - rawData[y][x] *= map.calcVignetteFac(x, y); - } - } + map.processVignetteLine(W, y, rawData[y]); } } else if(ri->get_colors() == 3) { #ifdef _OPENMP @@ -1803,14 +1785,7 @@ void RawImageSource::preprocess (const RAWParams &raw, const LensProfParams &le #endif for (int y = 0; y < H; y++) { - for (int x = 0; x < W; x++) { - float vignFactor = map.calcVignetteFac(x, y); - for(int c = 0;c < 3; ++c) { - if (rawData[y][3 * x + c] > 0) { - rawData[y][3 * x + c] *= vignFactor; - } - } - } + map.processVignetteLine3Channels(W, y, rawData[y]); } } }