diff --git a/rtengine/amaze_demosaic_RT.cc b/rtengine/amaze_demosaic_RT.cc index 6da291303..a34e8e7f7 100644 --- a/rtengine/amaze_demosaic_RT.cc +++ b/rtengine/amaze_demosaic_RT.cc @@ -32,6 +32,7 @@ #include "../rtgui/multilangmgr.h" #include "sleef.c" #include "opthelper.h" +#include "median.h" #include "StopWatch.h" namespace rtengine @@ -552,7 +553,7 @@ SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, vfloat hwtv = onev + temp2v / ( epsv + Ginthv + LVF( cfa[indx])); vmask hcdmask = vmaskf_gt( nsgnv * hcdv, ZEROV ); vfloat hcdoldv = hcdv; - vfloat tempv = nsgnv * (LVF(cfa[indx]) - ULIMV( Ginthv, LVFU(cfa[indx - 1]), LVFU(cfa[indx + 1]) )); + vfloat tempv = nsgnv * (LVF(cfa[indx]) - median( Ginthv, LVFU(cfa[indx - 1]), LVFU(cfa[indx + 1]) )); hcdv = vself( vmaskf_lt( temp2v, -(LVF(cfa[indx]) + Ginthv)), tempv, vintpf(hwtv, hcdv, tempv)); hcdv = vself( hcdmask, hcdv, hcdoldv ); hcdv = vself( vmaskf_gt( Ginthv, clip_ptv), tempv, hcdv); @@ -563,7 +564,7 @@ SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, vfloat vwtv = onev + temp2v / ( epsv + Gintvv + LVF( cfa[indx])); vmask vcdmask = vmaskf_gt( nsgnv * vcdv, ZEROV ); vfloat vcdoldv = vcdv; - tempv = nsgnv * (LVF(cfa[indx]) - ULIMV( Gintvv, LVF(cfa[indx - v1]), LVF(cfa[indx + v1]) )); + tempv = nsgnv * (LVF(cfa[indx]) - median( Gintvv, LVF(cfa[indx - v1]), LVF(cfa[indx + v1]) )); vcdv = vself( vmaskf_lt( temp2v, -(LVF(cfa[indx]) + Gintvv)), tempv, vintpf(vwtv, vcdv, tempv)); vcdv = vself( vcdmask, vcdv, vcdoldv ); vcdv = vself( vmaskf_gt( Gintvv, clip_ptv), tempv, vcdv); @@ -1071,13 +1072,13 @@ SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, vfloat rbmv = (wtsev * rbnwv + wtnwv * rbsev) / (wtsev + wtnwv); - temp1v = ULIMV(rbmv , LC2VFU(cfa[indx - m1]), LC2VFU(cfa[indx + m1])); + temp1v = median(rbmv , LC2VFU(cfa[indx - m1]), LC2VFU(cfa[indx + m1])); vfloat wtv = vmul2f(cfav - rbmv) / (epsv + rbmv + cfav); temp2v = vintpf(wtv, rbmv, temp1v); temp2v = vself(vmaskf_lt(rbmv + rbmv, cfav), temp1v, temp2v); temp2v = vself(vmaskf_lt(rbmv, cfav), temp2v, rbmv); - STVFU(rbm[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v , LC2VFU(cfa[indx - m1]), LC2VFU(cfa[indx + m1])), temp2v )); + STVFU(rbm[indx1], vself(vmaskf_gt(temp2v, clip_ptv), median(temp2v , LC2VFU(cfa[indx - m1]), LC2VFU(cfa[indx + m1])), temp2v )); temp1v = LC2VFU(cfa[indx + p1]); @@ -1096,13 +1097,13 @@ SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, vfloat rbpv = (wtnev * rbswv + wtswv * rbnev) / (wtnev + wtswv); - temp1v = ULIMV(rbpv , LC2VFU(cfa[indx - p1]), LC2VFU(cfa[indx + p1])); + temp1v = median(rbpv , LC2VFU(cfa[indx - p1]), LC2VFU(cfa[indx + p1])); wtv = vmul2f(cfav - rbpv) / (epsv + rbpv + cfav); temp2v = vintpf(wtv, rbpv, temp1v); temp2v = vself(vmaskf_lt(rbpv + rbpv, cfav), temp1v, temp2v); temp2v = vself(vmaskf_lt(rbpv, cfav), temp2v, rbpv); - STVFU(rbp[indx1], vself(vmaskf_gt(temp2v, clip_ptv), ULIMV(temp2v , LC2VFU(cfa[indx - p1]), LC2VFU(cfa[indx + p1])), temp2v )); + STVFU(rbp[indx1], vself(vmaskf_gt(temp2v, clip_ptv), median(temp2v , LC2VFU(cfa[indx - p1]), LC2VFU(cfa[indx + p1])), temp2v )); vfloat rbvarmv = epssqv + (gausseven0v * (LVFU(Dgrbsq1m[(indx - v1) >> 1]) + LVFU(Dgrbsq1m[(indx - 1) >> 1]) + LVFU(Dgrbsq1m[(indx + 1) >> 1]) + LVFU(Dgrbsq1m[(indx + v1) >> 1])) + gausseven1v * (LVFU(Dgrbsq1m[(indx - v2 - 1) >> 1]) + LVFU(Dgrbsq1m[(indx - v2 + 1) >> 1]) + LVFU(Dgrbsq1m[(indx - 2 - v1) >> 1]) + LVFU(Dgrbsq1m[(indx + 2 - v1) >> 1]) + @@ -1254,12 +1255,12 @@ SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, gdv = vself(vmaskf_lt(vabsf(onev - crdv), arthreshv), gdv, gd2v); vfloat Gintvv = (LC2VFU(dirwts0[indx - v1]) * gdv + LC2VFU(dirwts0[indx + v1]) * guv) / (LC2VFU(dirwts0[indx + v1]) + LC2VFU(dirwts0[indx - v1])); - vfloat Gint1v = ULIMV(Gintvv , LC2VFU(cfa[indx - v1]), LC2VFU(cfa[indx + v1])); + vfloat Gint1v = median(Gintvv , LC2VFU(cfa[indx - v1]), LC2VFU(cfa[indx + v1])); vfloat vwtv = vmul2f(rbintv - Gintvv) / (epsv + Gintvv + rbintv); vfloat Gint2v = vintpf(vwtv, Gintvv, Gint1v); Gint1v = vself(vmaskf_lt(vmul2f(Gintvv), rbintv), Gint1v, Gint2v); Gintvv = vself(vmaskf_lt(Gintvv, rbintv), Gint1v, Gintvv); - Gintvv = vself(vmaskf_gt(Gintvv, clip_ptv), ULIMV(Gintvv, LC2VFU(cfa[indx - v1]), LC2VFU(cfa[indx + v1])), Gintvv); + Gintvv = vself(vmaskf_gt(Gintvv, clip_ptv), median(Gintvv, LC2VFU(cfa[indx - v1]), LC2VFU(cfa[indx + v1])), Gintvv); vfloat crlv = vmul2f(LC2VFU(cfa[indx - 1])) / (epsv + rbintv + LVFU(rbint[(indx1 - 1)])); vfloat glv = rbintv * crlv; @@ -1272,12 +1273,12 @@ SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, grv = vself(vmaskf_lt(vabsf(onev - crrv), arthreshv), grv, gr2v); vfloat Ginthv = (LC2VFU(dirwts1[indx - 1]) * grv + LC2VFU(dirwts1[indx + 1]) * glv) / (LC2VFU(dirwts1[indx - 1]) + LC2VFU(dirwts1[indx + 1])); - vfloat Gint1h = ULIMV(Ginthv , LC2VFU(cfa[indx - 1]), LC2VFU(cfa[indx + 1])); + vfloat Gint1h = median(Ginthv , LC2VFU(cfa[indx - 1]), LC2VFU(cfa[indx + 1])); vfloat hwtv = vmul2f(rbintv - Ginthv) / (epsv + Ginthv + rbintv); vfloat Gint2h = vintpf(hwtv, Ginthv, Gint1h); Gint1h = vself(vmaskf_lt(vmul2f(Ginthv), rbintv), Gint1h, Gint2h); Ginthv = vself(vmaskf_lt(Ginthv, rbintv), Gint1h, Ginthv); - Ginthv = vself(vmaskf_gt(Ginthv, clip_ptv), ULIMV(Ginthv, LC2VFU(cfa[indx - 1]), LC2VFU(cfa[indx + 1])), Ginthv); + Ginthv = vself(vmaskf_gt(Ginthv, clip_ptv), median(Ginthv, LC2VFU(cfa[indx - 1]), LC2VFU(cfa[indx + 1])), Ginthv); vfloat greenv = vself(copymask, vintpf(LVFU(hvwt[indx1]), Gintvv, Ginthv), LC2VFU(rgbgreen[indx])); STC2VFU(rgbgreen[indx], greenv); diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index 72f801bab..b5e05ba53 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -36,6 +36,7 @@ #include "procparams.h" #include "sleef.c" #include "opthelper.h" +#include "median.h" //#define BENCHMARK #include "StopWatch.h" #ifdef _OPENMP @@ -58,9 +59,6 @@ namespace rtengine #define x00625(a) xdivf(a, 4) #define x0125(a) xdivf(a, 3) - -#define PIX_SORT(a,b) { if ((a)>(b)) {temp=(a);(a)=(b);(b)=temp;} } -#define PIX_SORTV(av,bv) tempv = _mm_min_ps(av,bv); bv = _mm_max_ps(av,bv); av = tempv; extern const Settings* settings; //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -929,7 +927,7 @@ void RawImageSource::ppg_demosaic() } d = dir[i = diff[0] > diff[1]]; - pix[0][1] = ULIM(static_cast(guess[i] >> 2), pix[d][1], pix[-d][1]); + pix[0][1] = median(static_cast(guess[i] >> 2), pix[d][1], pix[-d][1]); } if(plistener) { @@ -1253,8 +1251,8 @@ void RawImageSource::jdl_interpolate_omp() // from "Lassus" for (col = 6 + (FC(row, 2) & 1), indx = row * width + col, c = FC(row, col) / 2; col < u - 6; col += 2, indx += 2) { f[0] = 1.f + 78.f * SQR((float)dif[indx][0]) + 69.f * (SQR((float) dif[indx - v][0]) + SQR((float)dif[indx + v][0])) + 51.f * (SQR((float)dif[indx - x][0]) + SQR((float)dif[indx + x][0])) + 21.f * (SQR((float)dif[indx - z][0]) + SQR((float)dif[indx + z][0])) - 6.f * SQR((float)dif[indx - v][0] + dif[indx][0] + dif[indx + v][0]) - 10.f * (SQR((float)dif[indx - x][0] + dif[indx - v][0] + dif[indx][0]) + SQR((float)dif[indx][0] + dif[indx + v][0] + dif[indx + x][0])) - 7.f * (SQR((float)dif[indx - z][0] + dif[indx - x][0] + dif[indx - v][0]) + SQR((float)dif[indx + v][0] + dif[indx + x][0] + dif[indx + z][0])); f[1] = 1.f + 78.f * SQR((float)dif[indx][1]) + 69.f * (SQR((float)dif[indx - 2][1]) + SQR((float)dif[indx + 2][1])) + 51.f * (SQR((float)dif[indx - 4][1]) + SQR((float)dif[indx + 4][1])) + 21.f * (SQR((float)dif[indx - 6][1]) + SQR((float)dif[indx + 6][1])) - 6.f * SQR((float)dif[indx - 2][1] + dif[indx][1] + dif[indx + 2][1]) - 10.f * (SQR((float)dif[indx - 4][1] + dif[indx - 2][1] + dif[indx][1]) + SQR((float)dif[indx][1] + dif[indx + 2][1] + dif[indx + 4][1])) - 7.f * (SQR((float)dif[indx - 6][1] + dif[indx - 4][1] + dif[indx - 2][1]) + SQR((float)dif[indx + 2][1] + dif[indx + 4][1] + dif[indx + 6][1])); - g[0] = ULIM(0.725f * dif[indx][0] + 0.1375f * dif[indx - v][0] + 0.1375f * dif[indx + v][0], dif[indx - v][0], dif[indx + v][0]); - g[1] = ULIM(0.725f * dif[indx][1] + 0.1375f * dif[indx - 2][1] + 0.1375f * dif[indx + 2][1], dif[indx - 2][1], dif[indx + 2][1]); + g[0] = median(0.725f * dif[indx][0] + 0.1375f * dif[indx - v][0] + 0.1375f * dif[indx + v][0], static_cast(dif[indx - v][0]), static_cast(dif[indx + v][0])); + g[1] = median(0.725f * dif[indx][1] + 0.1375f * dif[indx - 2][1] + 0.1375f * dif[indx + 2][1], static_cast(dif[indx - 2][1]), static_cast(dif[indx + 2][1])); chr[indx][c] = (f[1] * g[0] + f[0] * g[1]) / (f[0] + f[1]); } @@ -1268,10 +1266,10 @@ void RawImageSource::jdl_interpolate_omp() // from "Lassus" f[1] = 1.f / (float)(1.f + fabs((float)chr[indx - u + 1][c] - chr[indx + u - 1][c]) + fabs((float)chr[indx - u + 1][c] - chr[indx - w + 3][c]) + fabs((float)chr[indx + u - 1][c] - chr[indx - w + 3][c])); f[2] = 1.f / (float)(1.f + fabs((float)chr[indx + u - 1][c] - chr[indx - u + 1][c]) + fabs((float)chr[indx + u - 1][c] - chr[indx + w + 3][c]) + fabs((float)chr[indx - u + 1][c] - chr[indx + w - 3][c])); f[3] = 1.f / (float)(1.f + fabs((float)chr[indx + u + 1][c] - chr[indx - u - 1][c]) + fabs((float)chr[indx + u + 1][c] - chr[indx + w - 3][c]) + fabs((float)chr[indx - u - 1][c] - chr[indx + w + 3][c])); - g[0] = ULIM(chr[indx - u - 1][c], chr[indx - w - 1][c], chr[indx - u - 3][c]); - g[1] = ULIM(chr[indx - u + 1][c], chr[indx - w + 1][c], chr[indx - u + 3][c]); - g[2] = ULIM(chr[indx + u - 1][c], chr[indx + w - 1][c], chr[indx + u - 3][c]); - g[3] = ULIM(chr[indx + u + 1][c], chr[indx + w + 1][c], chr[indx + u + 3][c]); + g[0] = median(chr[indx - u - 1][c], chr[indx - w - 1][c], chr[indx - u - 3][c]); + g[1] = median(chr[indx - u + 1][c], chr[indx - w + 1][c], chr[indx - u + 3][c]); + g[2] = median(chr[indx + u - 1][c], chr[indx + w - 1][c], chr[indx + u - 3][c]); + g[3] = median(chr[indx + u + 1][c], chr[indx + w + 1][c], chr[indx + u + 3][c]); chr[indx][c] = (f[0] * g[0] + f[1] * g[1] + f[2] * g[2] + f[3] * g[3]) / (f[0] + f[1] + f[2] + f[3]); image[indx][1] = CLIP(image[indx][2 - d] + chr[indx][1 - c]); image[indx][d] = CLIP(image[indx][1] - chr[indx][c]); @@ -1459,7 +1457,7 @@ SSEFUNCTION void RawImageSource::lmmse_interpolate_omp(int winw, int winh, int i float Y = v0 + xdiv2f(rix[0][0]); if (rix[4][0] > 1.75f * Y) { - rix[0][0] = ULIM(rix[0][0], rix[4][ -1], rix[4][ 1]); + rix[0][0] = median(rix[0][0], rix[4][ -1], rix[4][ 1]); } else { rix[0][0] = LIM(rix[0][0], 0.0f, 1.0f); } @@ -1471,7 +1469,7 @@ SSEFUNCTION void RawImageSource::lmmse_interpolate_omp(int winw, int winh, int i Y = v0 + xdiv2f(rix[1][0]); if (rix[4][0] > 1.75f * Y) { - rix[1][0] = ULIM(rix[1][0], rix[4][-w1], rix[4][w1]); + rix[1][0] = median(rix[1][0], rix[4][-w1], rix[4][w1]); } else { rix[1][0] = LIM(rix[1][0], 0.0f, 1.0f); } @@ -1764,43 +1762,22 @@ SSEFUNCTION void RawImageSource::lmmse_interpolate_omp(int winw, int winh, int i int d = c + 3 - (c == 0 ? 0 : 1); int cc = 1; #ifdef __SSE2__ - __m128 p1v, p2v, p3v, p4v, p5v, p6v, p7v, p8v, p9v, tempv; for (; cc < cc1 - 4; cc += 4) { rix[d] = qix[d] + rr * cc1 + cc; rix[c] = qix[c] + rr * cc1 + cc; rix[1] = qix[1] + rr * cc1 + cc; // Assign 3x3 differential color values - p1v = LVFU(rix[c][-w1 - 1]) - LVFU(rix[1][-w1 - 1]); - p2v = LVFU(rix[c][-w1]) - LVFU(rix[1][-w1]); - p3v = LVFU(rix[c][-w1 + 1]) - LVFU(rix[1][-w1 + 1]); - p4v = LVFU(rix[c][ -1]) - LVFU(rix[1][ -1]); - p5v = LVFU(rix[c][ 0]) - LVFU(rix[1][ 0]); - p6v = LVFU(rix[c][ 1]) - LVFU(rix[1][ 1]); - p7v = LVFU(rix[c][ w1 - 1]) - LVFU(rix[1][ w1 - 1]); - p8v = LVFU(rix[c][ w1]) - LVFU(rix[1][ w1]); - p9v = LVFU(rix[c][ w1 + 1]) - LVFU(rix[1][ w1 + 1]); - // Sort for median of 9 values - PIX_SORTV(p2v, p3v); - PIX_SORTV(p5v, p6v); - PIX_SORTV(p8v, p9v); - PIX_SORTV(p1v, p2v); - PIX_SORTV(p4v, p5v); - PIX_SORTV(p7v, p8v); - PIX_SORTV(p2v, p3v); - PIX_SORTV(p5v, p6v); - PIX_SORTV(p8v, p9v); - p4v = _mm_max_ps(p1v, p4v); - p6v = _mm_min_ps(p6v, p9v); - PIX_SORTV(p5v, p8v); - p7v = _mm_max_ps(p4v, p7v); - p5v = _mm_max_ps(p5v, p2v); - p3v = _mm_min_ps(p3v, p6v); - p5v = _mm_min_ps(p5v, p8v); - PIX_SORTV(p5v, p3v); - p5v = _mm_max_ps(p7v, p5v); - p5v = _mm_min_ps(p3v, p5v); - _mm_storeu_ps(&rix[d][0], p5v); + const __m128 p1v = LVFU(rix[c][-w1 - 1]) - LVFU(rix[1][-w1 - 1]); + const __m128 p2v = LVFU(rix[c][-w1]) - LVFU(rix[1][-w1]); + const __m128 p3v = LVFU(rix[c][-w1 + 1]) - LVFU(rix[1][-w1 + 1]); + const __m128 p4v = LVFU(rix[c][ -1]) - LVFU(rix[1][ -1]); + const __m128 p5v = LVFU(rix[c][ 0]) - LVFU(rix[1][ 0]); + const __m128 p6v = LVFU(rix[c][ 1]) - LVFU(rix[1][ 1]); + const __m128 p7v = LVFU(rix[c][ w1 - 1]) - LVFU(rix[1][ w1 - 1]); + const __m128 p8v = LVFU(rix[c][ w1]) - LVFU(rix[1][ w1]); + const __m128 p9v = LVFU(rix[c][ w1 + 1]) - LVFU(rix[1][ w1 + 1]); + _mm_storeu_ps(&rix[d][0], median(p1v, p2v, p3v, p4v, p5v, p6v, p7v, p8v, p9v)); } #endif @@ -1811,36 +1788,16 @@ SSEFUNCTION void RawImageSource::lmmse_interpolate_omp(int winw, int winh, int i rix[c] = qix[c] + rr * cc1 + cc; rix[1] = qix[1] + rr * cc1 + cc; // Assign 3x3 differential color values - float p1 = rix[c][-w1 - 1] - rix[1][-w1 - 1]; - float p2 = rix[c][-w1] - rix[1][-w1]; - float p3 = rix[c][-w1 + 1] - rix[1][-w1 + 1]; - float p4 = rix[c][ -1] - rix[1][ -1]; - float p5 = rix[c][ 0] - rix[1][ 0]; - float p6 = rix[c][ 1] - rix[1][ 1]; - float p7 = rix[c][ w1 - 1] - rix[1][ w1 - 1]; - float p8 = rix[c][ w1] - rix[1][ w1]; - float p9 = rix[c][ w1 + 1] - rix[1][ w1 + 1]; - // Sort for median of 9 values - PIX_SORT(p2, p3); - PIX_SORT(p5, p6); - PIX_SORT(p8, p9); - PIX_SORT(p1, p2); - PIX_SORT(p4, p5); - PIX_SORT(p7, p8); - PIX_SORT(p2, p3); - PIX_SORT(p5, p6); - PIX_SORT(p8, p9); - PIX_SORT(p1, p4); - PIX_SORT(p6, p9); - PIX_SORT(p5, p8); - PIX_SORT(p4, p7); - PIX_SORT(p2, p5); - PIX_SORT(p3, p6); - PIX_SORT(p5, p8); - PIX_SORT(p5, p3); - PIX_SORT(p7, p5); - PIX_SORT(p5, p3); - rix[d][0] = p5; + const float p1 = rix[c][-w1 - 1] - rix[1][-w1 - 1]; + const float p2 = rix[c][-w1] - rix[1][-w1]; + const float p3 = rix[c][-w1 + 1] - rix[1][-w1 + 1]; + const float p4 = rix[c][ -1] - rix[1][ -1]; + const float p5 = rix[c][ 0] - rix[1][ 0]; + const float p6 = rix[c][ 1] - rix[1][ 1]; + const float p7 = rix[c][ w1 - 1] - rix[1][ w1 - 1]; + const float p8 = rix[c][ w1] - rix[1][ w1]; + const float p9 = rix[c][ w1 + 1] - rix[1][ w1 + 1]; + rix[d][0] = median(p1, p2, p3, p4, p5, p6, p7, p8, p9); } } } @@ -2158,8 +2115,8 @@ SSEFUNCTION void RawImageSource::igv_interpolate(int winw, int winh) egv = LIMV(epssqv + c78v * SQRV(LVFU(hdif[indx1])) + c69v * (SQRV(LVFU(hdif[indx1 - h1])) + SQRV(LVFU(hdif[indx1 + h1]))) + c51v * (SQRV(LVFU(hdif[indx1 - h2])) + SQRV(LVFU(hdif[indx1 + h2]))) + c21v * (SQRV(LVFU(hdif[indx1 - h3])) + SQRV(LVFU(hdif[indx1 + h3]))) - c6v * SQRV(LVFU(hdif[indx1 - h1]) + LVFU(hdif[indx1]) + LVFU(hdif[indx1 + h1])) - c10v * (SQRV(LVFU(hdif[indx1 - h2]) + LVFU(hdif[indx1 - h1]) + LVFU(hdif[indx1])) + SQRV(LVFU(hdif[indx1]) + LVFU(hdif[indx1 + h1]) + LVFU(hdif[indx1 + h2]))) - c7v * (SQRV(LVFU(hdif[indx1 - h3]) + LVFU(hdif[indx1 - h2]) + LVFU(hdif[indx1 - h1])) + SQRV(LVFU(hdif[indx1 + h1]) + LVFU(hdif[indx1 + h2]) + LVFU(hdif[indx1 + h3]))), zerov, onev); //Limit chrominance using H/V neighbourhood - nvv = ULIMV(d725v * LVFU(vdif[indx1]) + d1375v * LVFU(vdif[indx1 - v1]) + d1375v * LVFU(vdif[indx1 + v1]), LVFU(vdif[indx1 - v1]), LVFU(vdif[indx1 + v1])); - evv = ULIMV(d725v * LVFU(hdif[indx1]) + d1375v * LVFU(hdif[indx1 - h1]) + d1375v * LVFU(hdif[indx1 + h1]), LVFU(hdif[indx1 - h1]), LVFU(hdif[indx1 + h1])); + nvv = median(d725v * LVFU(vdif[indx1]) + d1375v * LVFU(vdif[indx1 - v1]) + d1375v * LVFU(vdif[indx1 + v1]), LVFU(vdif[indx1 - v1]), LVFU(vdif[indx1 + v1])); + evv = median(d725v * LVFU(hdif[indx1]) + d1375v * LVFU(hdif[indx1 - h1]) + d1375v * LVFU(hdif[indx1 + h1]), LVFU(hdif[indx1 - h1]), LVFU(hdif[indx1 + h1])); //Chrominance estimation tempv = (egv * nvv + ngv * evv) / (ngv + egv); _mm_storeu_ps(&(chr[d][indx1]), tempv); @@ -2176,8 +2133,8 @@ SSEFUNCTION void RawImageSource::igv_interpolate(int winw, int winh) eg = LIM(epssq + 78.0f * SQR(hdif[indx1]) + 69.0f * (SQR(hdif[indx1 - h1]) + SQR(hdif[indx1 + h1])) + 51.0f * (SQR(hdif[indx1 - h2]) + SQR(hdif[indx1 + h2])) + 21.0f * (SQR(hdif[indx1 - h3]) + SQR(hdif[indx1 + h3])) - 6.0f * SQR(hdif[indx1 - h1] + hdif[indx1] + hdif[indx1 + h1]) - 10.0f * (SQR(hdif[indx1 - h2] + hdif[indx1 - h1] + hdif[indx1]) + SQR(hdif[indx1] + hdif[indx1 + h1] + hdif[indx1 + h2])) - 7.0f * (SQR(hdif[indx1 - h3] + hdif[indx1 - h2] + hdif[indx1 - h1]) + SQR(hdif[indx1 + h1] + hdif[indx1 + h2] + hdif[indx1 + h3])), 0.f, 1.f); //Limit chrominance using H/V neighbourhood - nv = ULIM(0.725f * vdif[indx1] + 0.1375f * vdif[indx1 - v1] + 0.1375f * vdif[indx1 + v1], vdif[indx1 - v1], vdif[indx1 + v1]); - ev = ULIM(0.725f * hdif[indx1] + 0.1375f * hdif[indx1 - h1] + 0.1375f * hdif[indx1 + h1], hdif[indx1 - h1], hdif[indx1 + h1]); + nv = median(0.725f * vdif[indx1] + 0.1375f * vdif[indx1 - v1] + 0.1375f * vdif[indx1 + v1], vdif[indx1 - v1], vdif[indx1 + v1]); + ev = median(0.725f * hdif[indx1] + 0.1375f * hdif[indx1 - h1] + 0.1375f * hdif[indx1 + h1], hdif[indx1 - h1], hdif[indx1 + h1]); //Chrominance estimation chr[d][indx1] = (eg * nv + ng * ev) / (ng + eg); //Green channel population @@ -2207,10 +2164,10 @@ SSEFUNCTION void RawImageSource::igv_interpolate(int winw, int winh) swgv = onev / (epsv + vabsf(LVFU(chr[c][(indx + v1 - h1) >> 1]) - LVFU(chr[c][(indx + v3 + h3) >> 1])) + vabsf(LVFU(chr[c][(indx - v1 + h1) >> 1]) - LVFU(chr[c][(indx + v3 - h3) >> 1]))); segv = onev / (epsv + vabsf(LVFU(chr[c][(indx + v1 + h1) >> 1]) - LVFU(chr[c][(indx + v3 - h3) >> 1])) + vabsf(LVFU(chr[c][(indx - v1 - h1) >> 1]) - LVFU(chr[c][(indx + v3 + h3) >> 1]))); //Limit NW,NE,SW,SE Color differences - nwvv = ULIMV(LVFU(chr[c][(indx - v1 - h1) >> 1]), LVFU(chr[c][(indx - v3 - h1) >> 1]), LVFU(chr[c][(indx - v1 - h3) >> 1])); - nevv = ULIMV(LVFU(chr[c][(indx - v1 + h1) >> 1]), LVFU(chr[c][(indx - v3 + h1) >> 1]), LVFU(chr[c][(indx - v1 + h3) >> 1])); - swvv = ULIMV(LVFU(chr[c][(indx + v1 - h1) >> 1]), LVFU(chr[c][(indx + v3 - h1) >> 1]), LVFU(chr[c][(indx + v1 - h3) >> 1])); - sevv = ULIMV(LVFU(chr[c][(indx + v1 + h1) >> 1]), LVFU(chr[c][(indx + v3 + h1) >> 1]), LVFU(chr[c][(indx + v1 + h3) >> 1])); + nwvv = median(LVFU(chr[c][(indx - v1 - h1) >> 1]), LVFU(chr[c][(indx - v3 - h1) >> 1]), LVFU(chr[c][(indx - v1 - h3) >> 1])); + nevv = median(LVFU(chr[c][(indx - v1 + h1) >> 1]), LVFU(chr[c][(indx - v3 + h1) >> 1]), LVFU(chr[c][(indx - v1 + h3) >> 1])); + swvv = median(LVFU(chr[c][(indx + v1 - h1) >> 1]), LVFU(chr[c][(indx + v3 - h1) >> 1]), LVFU(chr[c][(indx + v1 - h3) >> 1])); + sevv = median(LVFU(chr[c][(indx + v1 + h1) >> 1]), LVFU(chr[c][(indx + v3 + h1) >> 1]), LVFU(chr[c][(indx + v1 + h3) >> 1])); //Interpolate chrominance: R@B and B@R tempv = (nwgv * nwvv + negv * nevv + swgv * swvv + segv * sevv) / (nwgv + negv + swgv + segv); _mm_storeu_ps( &(chr[c][indx >> 1]), tempv); @@ -2223,10 +2180,10 @@ SSEFUNCTION void RawImageSource::igv_interpolate(int winw, int winh) swg = 1.0f / (eps + fabsf(chr[c][(indx + v1 - h1) >> 1] - chr[c][(indx + v3 + h3) >> 1]) + fabsf(chr[c][(indx - v1 + h1) >> 1] - chr[c][(indx + v3 - h3) >> 1])); seg = 1.0f / (eps + fabsf(chr[c][(indx + v1 + h1) >> 1] - chr[c][(indx + v3 - h3) >> 1]) + fabsf(chr[c][(indx - v1 - h1) >> 1] - chr[c][(indx + v3 + h3) >> 1])); //Limit NW,NE,SW,SE Color differences - nwv = ULIM(chr[c][(indx - v1 - h1) >> 1], chr[c][(indx - v3 - h1) >> 1], chr[c][(indx - v1 - h3) >> 1]); - nev = ULIM(chr[c][(indx - v1 + h1) >> 1], chr[c][(indx - v3 + h1) >> 1], chr[c][(indx - v1 + h3) >> 1]); - swv = ULIM(chr[c][(indx + v1 - h1) >> 1], chr[c][(indx + v3 - h1) >> 1], chr[c][(indx + v1 - h3) >> 1]); - sev = ULIM(chr[c][(indx + v1 + h1) >> 1], chr[c][(indx + v3 + h1) >> 1], chr[c][(indx + v1 + h3) >> 1]); + nwv = median(chr[c][(indx - v1 - h1) >> 1], chr[c][(indx - v3 - h1) >> 1], chr[c][(indx - v1 - h3) >> 1]); + nev = median(chr[c][(indx - v1 + h1) >> 1], chr[c][(indx - v3 + h1) >> 1], chr[c][(indx - v1 + h3) >> 1]); + swv = median(chr[c][(indx + v1 - h1) >> 1], chr[c][(indx + v3 - h1) >> 1], chr[c][(indx + v1 - h3) >> 1]); + sev = median(chr[c][(indx + v1 + h1) >> 1], chr[c][(indx + v3 + h1) >> 1], chr[c][(indx + v1 + h3) >> 1]); //Interpolate chrominance: R@B and B@R chr[c][indx >> 1] = (nwg * nwv + neg * nev + swg * swv + seg * sev) / (nwg + neg + swg + seg); } @@ -2730,10 +2687,10 @@ void RawImageSource::ahd_demosaic(int winx, int winy, int winw, int winh) pix = image + (row * width + col); val = 0.25 * ((pix[-1][1] + pix[0][c] + pix[1][1]) * 2 - pix[-2][c] - pix[2][c]) ; - rgb[0][row - top][col - left][1] = ULIM(static_cast(val), pix[-1][1], pix[1][1]); + rgb[0][row - top][col - left][1] = median(static_cast(val), pix[-1][1], pix[1][1]); val = 0.25 * ((pix[-width][1] + pix[0][c] + pix[width][1]) * 2 - pix[-2 * width][c] - pix[2 * width][c]) ; - rgb[1][row - top][col - left][1] = ULIM(static_cast(val), pix[-width][1], pix[width][1]); + rgb[1][row - top][col - left][1] = median(static_cast(val), pix[-width][1], pix[width][1]); } } @@ -3215,39 +3172,21 @@ void RawImageSource::refinement_lassus(int PassCount) g[4] = (f[0] * g[0] + f[1] * g[1] + f[2] * g[2] + f[3] * g[3]) / (f[0] + f[1] + f[2] + f[3]); - float p[9]; - p[0] = (pix[-u - 1][1] - pix[-u - 1][c]); - p[1] = (pix[-u + 0][1] - pix[-u + 0][c]); - p[2] = (pix[-u + 1][1] - pix[-u + 1][c]); - p[3] = (pix[+0 - 1][1] - pix[+0 - 1][c]); - p[4] = (pix[+0 + 0][1] - pix[+0 + 0][c]); - p[5] = (pix[+0 + 1][1] - pix[+0 + 1][c]); - p[6] = (pix[+u - 1][1] - pix[+u - 1][c]); - p[7] = (pix[+u + 0][1] - pix[+u + 0][c]); - p[8] = (pix[+u + 1][1] - pix[+u + 1][c]); + const std::array p = { + pix[-u - 1][1] - pix[-u - 1][c], + pix[-u + 0][1] - pix[-u + 0][c], + pix[-u + 1][1] - pix[-u + 1][c], + pix[+0 - 1][1] - pix[+0 - 1][c], + pix[+0 + 0][1] - pix[+0 + 0][c], + pix[+0 + 1][1] - pix[+0 + 1][c], + pix[+u - 1][1] - pix[+u - 1][c], + pix[+u + 0][1] - pix[+u + 0][c], + pix[+u + 1][1] - pix[+u + 1][c] + }; - // sort p[] - float temp; // used in PIX_SORT macro; - PIX_SORT(p[1], p[2]); - PIX_SORT(p[4], p[5]); - PIX_SORT(p[7], p[8]); - PIX_SORT(p[0], p[1]); - PIX_SORT(p[3], p[4]); - PIX_SORT(p[6], p[7]); - PIX_SORT(p[1], p[2]); - PIX_SORT(p[4], p[5]); - PIX_SORT(p[7], p[8]); - PIX_SORT(p[0], p[3]); - PIX_SORT(p[5], p[8]); - PIX_SORT(p[4], p[7]); - PIX_SORT(p[3], p[6]); - PIX_SORT(p[1], p[4]); - PIX_SORT(p[2], p[5]); - PIX_SORT(p[4], p[7]); - PIX_SORT(p[4], p[2]); - PIX_SORT(p[6], p[4]); - PIX_SORT(p[4], p[2]); - pix[0][c] = LIM(pix[0][1] - (1.30f * g[4] - 0.30f * (pix[0][1] - pix[0][c])), 0.99f * (pix[0][1] - p[4]), 1.01f * (pix[0][1] - p[4])); + const float med = median(p); + + pix[0][c] = LIM(pix[0][1] - (1.30f * g[4] - 0.30f * (pix[0][1] - pix[0][c])), 0.99f * (pix[0][1] - med), 1.01f * (pix[0][1] - med)); } } diff --git a/rtengine/median.h b/rtengine/median.h index 13a79e381..8ac572179 100644 --- a/rtengine/median.h +++ b/rtengine/median.h @@ -42,11 +42,13 @@ inline T median(std::array array) return std::max(std::min(array[0], array[1]), std::min(array[2], std::max(array[0], array[1]))); } +#ifdef __SSE2__ template<> inline vfloat median(std::array array) { return vmaxf(vminf(array[0], array[1]), vminf(array[2], vmaxf(array[0], array[1]))); } +#endif template inline T median(std::array array) @@ -65,6 +67,7 @@ inline T median(std::array array) return std::max(array[1], tmp); } +#ifdef __SSE2__ template<> inline vfloat median(std::array array) { @@ -81,6 +84,7 @@ inline vfloat median(std::array array) tmp = vminf(array[2], array[3]); return vmaxf(array[1], tmp); } +#endif template inline T median(std::array array) @@ -112,6 +116,7 @@ inline T median(std::array array) return std::min(array[3], array[4]); } +#ifdef __SSE2__ template<> inline vfloat median(std::array array) { @@ -141,6 +146,7 @@ inline vfloat median(std::array array) array[3] = vmaxf(tmp, array[3]); return vminf(array[3], array[4]); } +#endif template inline T median(std::array array) @@ -185,6 +191,7 @@ inline T median(std::array array) return std::min(array[4], array[2]); } +#ifdef __SSE2__ template<> inline vfloat median(std::array array) { @@ -227,6 +234,7 @@ inline vfloat median(std::array array) array[4] = vmaxf(array[6], tmp); return vminf(array[4], array[2]); } +#endif template inline T median(std::array array) @@ -441,6 +449,7 @@ inline T median(std::array array) return std::max(tmp, array[12]); } +#ifdef __SSE2__ template<> inline vfloat median(std::array array) { @@ -653,6 +662,7 @@ inline vfloat median(std::array array) tmp = vminf(array[10], array[20]); return vmaxf(tmp, array[12]); } +#endif template inline T median(T arg, ARGS... args) @@ -699,54 +709,3 @@ d4 = vmaxf(d3,d4);\ d3 = vmaxf(d0,temp);\ d2 = vminf(d2,d5);\ } - - -#define MEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \ -{\ -t0 = std::min(s0,s5);\ -t5 = std::max(s0,s5);\ -t3 = std::max(t0,s3);\ -t0 = std::min(t0,s3);\ -t1 = std::min(s1,s6);\ -t6 = std::max(s1,s6);\ -t2 = std::min(s2,s4);\ -t4 = std::max(s2,s4);\ -t1 = std::max(t0,t1);\ -median = std::min(t3,t5);\ -t5 = std::max(t3,t5);\ -t3 = median;\ -median = std::min(t2,t6);\ -t6 = std::max(t2,t6);\ -t3 = std::max(median,t3);\ -t3 = std::min(t3,t6);\ -t4 = std::min(t4,t5);\ -median = std::min(t1,t4);\ -t4 = std::max(t1,t4);\ -t3 = std::max(median,t3);\ -median = std::min(t3,t4);\ -} - -#define VMEDIAN7(s0,s1,s2,s3,s4,s5,s6,t0,t1,t2,t3,t4,t5,t6,median) \ -{\ -t0 = vminf(s0,s5);\ -t5 = vmaxf(s0,s5);\ -t3 = vmaxf(t0,s3);\ -t0 = vminf(t0,s3);\ -t1 = vminf(s1,s6);\ -t6 = vmaxf(s1,s6);\ -t2 = vminf(s2,s4);\ -t4 = vmaxf(s2,s4);\ -t1 = vmaxf(t0,t1);\ -median = vminf(t3,t5);\ -t5 = vmaxf(t3,t5);\ -t3 = median;\ -median = vminf(t2,t6);\ -t6 = vmaxf(t2,t6);\ -t3 = vmaxf(median,t3);\ -t3 = vminf(t3,t6);\ -t4 = vminf(t4,t5);\ -median = vminf(t1,t4);\ -t4 = vmaxf(t1,t4);\ -t3 = vmaxf(median,t3);\ -median = vminf(t3,t4);\ -} diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index fa05e34c3..5d3566126 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -3483,12 +3483,11 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D for (int j = 1; j < W - 2; j += 2) { post1[0] = _mm_setr_ps(rbconv_I[px][j + 1], rbconv_Q[px][j + 1], 0, 0), post1[1] = _mm_setr_ps(rbconv_I[cx][j + 1], rbconv_Q[cx][j + 1], 0, 0), post1[2] = _mm_setr_ps(rbconv_I[nx][j + 1], rbconv_Q[nx][j + 1], 0, 0); VMIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); - vfloat medianval; - VMEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval); + vfloat medianval = median(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4]); rbout_I[cx][j] = medianval[0]; rbout_Q[cx][j] = medianval[1]; post2[0] = _mm_setr_ps(rbconv_I[px][j + 2], rbconv_Q[px][j + 2], 0, 0), post2[1] = _mm_setr_ps(rbconv_I[cx][j + 2], rbconv_Q[cx][j + 2], 0, 0), post2[2] = _mm_setr_ps(rbconv_I[nx][j + 2], rbconv_Q[nx][j + 2], 0, 0); - VMEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], medianval); + medianval = median(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4]); rbout_I[cx][j + 1] = medianval[0]; rbout_Q[cx][j + 1] = medianval[1]; std::swap(pre1, post1); @@ -3513,9 +3512,9 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D for (int j = 1; j < W - 2; j += 2) { post1[0] = rbconv_I[px][j + 1], post1[1] = rbconv_I[cx][j + 1], post1[2] = rbconv_I[nx][j + 1]; MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); - MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j]); + rbout_I[cx][j] = median(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4]); post2[0] = rbconv_I[px][j + 2], post2[1] = rbconv_I[cx][j + 2], post2[2] = rbconv_I[nx][j + 2]; - MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_I[cx][j + 1]); + rbout_I[cx][j + 1] = median(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4]); std::swap(pre1, post1); std::swap(pre2, post2); } @@ -3534,9 +3533,9 @@ void RawImageSource::processFalseColorCorrectionThread (Imagefloat* im, array2D for (int j = 1; j < W - 2; j += 2) { post1[0] = rbconv_Q[px][j + 1], post1[1] = rbconv_Q[cx][j + 1], post1[2] = rbconv_Q[nx][j + 1]; MIDDLE4OF6(pre2[0], pre2[1], pre2[2], post1[0], post1[1], post1[2], middle[0], middle[1], middle[2], middle[3], middle[4], middle[5], temp[0]); - MEDIAN7(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j]); + rbout_Q[cx][j] = median(pre1[0], pre1[1], pre1[2], middle[1], middle[2], middle[3], middle[4]); post2[0] = rbconv_Q[px][j + 2], post2[1] = rbconv_Q[cx][j + 2], post2[2] = rbconv_Q[nx][j + 2]; - MEDIAN7(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4], temp[0], temp[1], temp[2], temp[3], temp[4], temp[5], temp[6], rbout_Q[cx][j + 1]); + rbout_Q[cx][j + 1] = median(post2[0], post2[1], post2[2], middle[1], middle[2], middle[3], middle[4]); std::swap(pre1, post1); std::swap(pre2, post2); } diff --git a/rtengine/rt_math.h b/rtengine/rt_math.h index 702fb5360..0836c8be7 100644 --- a/rtengine/rt_math.h +++ b/rtengine/rt_math.h @@ -42,12 +42,6 @@ inline _Tp LIM01(const _Tp& a) return std::max(_Tp(0), std::min(a, _Tp(1))); } -template -inline const _Tp ULIM(const _Tp& a, const _Tp& b, const _Tp& c) -{ - return ((b < c) ? LIM(a, b, c) : LIM(a, c, b)); -} - template inline _Tp CLIP(const _Tp& a) { diff --git a/rtengine/sleefsseavx.c b/rtengine/sleefsseavx.c index 1d15e1e41..a9f49f143 100644 --- a/rtengine/sleefsseavx.c +++ b/rtengine/sleefsseavx.c @@ -1334,12 +1334,6 @@ static INLINE vfloat LIMV( vfloat a, vfloat b, vfloat c ) { return vmaxf( b, vminf(a,c)); } -static INLINE vfloat ULIMV( vfloat a, vfloat b, vfloat c ){ - // made to clamp a in range [b,c] but in fact it's also the median of a,b,c, which means that the result is independent on order of arguments - // ULIMV(a,b,c) = ULIMV(a,c,b) = ULIMV(b,a,c) = ULIMV(b,c,a) = ULIMV(c,a,b) = ULIMV(c,b,a) - return vmaxf(vminf(a,b), vminf(vmaxf(a,b),c)); -} - static INLINE vfloat SQRV(vfloat a){ return a * a; }