diff --git a/rtdata/languages/default b/rtdata/languages/default index 5c0273d3f..02ae8aff1 100644 --- a/rtdata/languages/default +++ b/rtdata/languages/default @@ -1763,6 +1763,7 @@ TP_RAW_1PASSMEDIUM;1-Pass (Medium) TP_RAW_3PASSBEST;3-Pass (Best) TP_RAW_AHD;AHD TP_RAW_AMAZE;AMaZE +TP_RAW_AMAZEVNG4;AMaZE+VNG4 TP_RAW_DCB;DCB TP_RAW_DCBENHANCE;DCB enhancement TP_RAW_DCBITERATIONS;Number of DCB iterations diff --git a/rtengine/amaze_vng4_demosaic_RT.cc b/rtengine/amaze_vng4_demosaic_RT.cc index f6b109bb6..b27c26df0 100644 --- a/rtengine/amaze_vng4_demosaic_RT.cc +++ b/rtengine/amaze_vng4_demosaic_RT.cc @@ -29,104 +29,11 @@ #include "rtengine.h" #include "rawimagesource.h" #include "rt_math.h" -#include "sleef.c" -#include "opthelper.h" -#include "jaggedarray.h" -#include "gauss.h" +#define BENCHMARK #include "StopWatch.h" +#include "rt_algo.h" using namespace std; -namespace { - -float calcBlendFactor(float val, float threshold) { - // sigmoid function - // result is in ]0;1] range - // inflexion point is at (x, y) (threshold, 0.5) - return 1.f / (1.f + xexpf(16.f - 16.f * val / threshold)); -} - -#ifdef __SSE2__ -vfloat calcBlendFactor(vfloat valv, vfloat thresholdv) { - // sigmoid function - // result is in ]0;1] range - // inflexion point is at (x, y) (threshold, 0.5) - const vfloat onev = F2V(1.f); - const vfloat c16v = F2V(16.f); - return onev / (onev + xexpf(c16v - c16v * valv / thresholdv)); -} -#endif - -void buildBlendMask(float** luminance, rtengine::JaggedArray &blend, int W, int H, float contrastThreshold, float amount = 1.f) { -BENCHFUN - - if(contrastThreshold == 0.f) { - for(int j = 0; j < H; ++j) { - for(int i = 0; i < W; ++i) { - blend[j][i] = 1.f; - } - } - } else { - constexpr float scale = 0.0625f / 327.68f; -#ifdef _OPENMP - #pragma omp parallel -#endif - { -#ifdef __SSE2__ - const vfloat contrastThresholdv = F2V(contrastThreshold); - const vfloat scalev = F2V(scale); - const vfloat amountv = F2V(amount); -#endif -#ifdef _OPENMP - #pragma omp for schedule(dynamic,16) -#endif - - for(int j = 2; j < H - 2; ++j) { - int i = 2; -#ifdef __SSE2__ - for(; i < W - 5; i += 4) { - vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + - SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; - - STVFU(blend[j][i], amountv * calcBlendFactor(contrastv, contrastThresholdv)); - } -#endif - for(; i < W - 2; ++i) { - - float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) + - rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale; - - blend[j][i] = amount * calcBlendFactor(contrast, contrastThreshold); - } - } -#ifdef _OPENMP - #pragma omp single -#endif - { - // upper border - for(int j = 0; j < 2; ++j) { - for(int i = 2; i < W - 2; ++i) { - blend[j][i] = blend[2][i]; - } - } - // lower border - for(int j = H - 2; j < H; ++j) { - for(int i = 2; i < W - 2; ++i) { - blend[j][i] = blend[H-3][i]; - } - } - for(int j = 0; j < H; ++j) { - // left border - blend[j][0] = blend[j][1] = blend[j][2]; - // right border - blend[j][W - 2] = blend[j][W - 1] = blend[j][W - 3]; - } - } - // blur blend mask to smooth transitions - gaussianBlur(blend, blend, W, H, 2.0); - } - } -} -} namespace rtengine { @@ -148,11 +55,9 @@ void RawImageSource::amaze_vng4_demosaic_RT(int winw, int winh, array2D & }; #pragma omp parallel { - float a[winw] ALIGNED16; - float b[winw] ALIGNED16; #pragma omp for for(int i = 0; i < winh; ++i) { - Color::RGB2Lab(redTmp[i], greenTmp[i], blueTmp[i], L[i], a, b, xyz_rgb, winw); + Color::RGB2L(redTmp[i], greenTmp[i], blueTmp[i], L[i], xyz_rgb, winw); } } // calculate contrast based blend factors to reduce sharpening in regions with low contrast diff --git a/rtengine/color.cc b/rtengine/color.cc index c4e3b6a68..5fb8d27d8 100644 --- a/rtengine/color.cc +++ b/rtengine/color.cc @@ -1837,6 +1837,45 @@ void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b, } } +void Color::RGB2L(float *R, float *G, float *B, float *L, const float wp[3][3], int width) +{ + +#ifdef __SSE2__ + vfloat minvalfv = F2V(0.f); + vfloat maxvalfv = F2V(MAXVALF); +#endif + int i = 0; + +#ifdef __SSE2__ + for(;i < width - 3; i+=4) { + const vfloat rv = LVFU(R[i]); + const vfloat gv = LVFU(G[i]); + const vfloat bv = LVFU(B[i]); + const vfloat yv = F2V(wp[1][0]) * rv + F2V(wp[1][1]) * gv + F2V(wp[1][2]) * bv; + + vmask maxMask = vmaskf_gt(yv, maxvalfv); + vmask minMask = vmaskf_lt(yv, minvalfv); + if (_mm_movemask_ps((vfloat)maxMask) || _mm_movemask_ps((vfloat)minMask)) { + // take slower code path for all 4 pixels if one of the values is > MAXVALF. Still faster than non SSE2 version + for(int k = 0; k < 4; ++k) { + float y = yv[k]; + L[i + k] = computeXYZ2LabY(y); + } + } else { + STVFU(L[i], cachefy[yv]); + } + } +#endif + for(;i < width; ++i) { + const float rv = R[i]; + const float gv = G[i]; + const float bv = B[i]; + float y = wp[1][0] * rv + wp[1][1] * gv + wp[1][2] * bv; + + L[i] = computeXYZ2LabY(y); + } +} + void Color::XYZ2Lab(float X, float Y, float Z, float &L, float &a, float &b) { diff --git a/rtengine/color.h b/rtengine/color.h index 5616a4079..3310f6fa6 100644 --- a/rtengine/color.h +++ b/rtengine/color.h @@ -611,6 +611,7 @@ public: */ static void XYZ2Lab(float x, float y, float z, float &L, float &a, float &b); static void RGB2Lab(float *X, float *Y, float *Z, float *L, float *a, float *b, const float wp[3][3], int width); + static void RGB2L(float *X, float *Y, float *Z, float *L, const float wp[3][3], int width); /** * @brief Convert Lab in Yuv diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc index e328edd1b..e3ecf41b3 100644 --- a/rtengine/ipsharpen.cc +++ b/rtengine/ipsharpen.cc @@ -26,99 +26,11 @@ #include "opthelper.h" //#define BENCHMARK #include "StopWatch.h" +#include "rt_algo.h" using namespace std; namespace { -float calcBlendFactor(float val, float threshold) { - // sigmoid function - // result is in ]0;1] range - // inflexion point is at (x, y) (threshold, 0.5) - return 1.f / (1.f + xexpf(16.f - 16.f * val / threshold)); -} - -#ifdef __SSE2__ -vfloat calcBlendFactor(vfloat valv, vfloat thresholdv) { - // sigmoid function - // result is in ]0;1] range - // inflexion point is at (x, y) (threshold, 0.5) - const vfloat onev = F2V(1.f); - const vfloat c16v = F2V(16.f); - return onev / (onev + xexpf(c16v - c16v * valv / thresholdv)); -} -#endif - -void buildBlendMask(float** luminance, rtengine::JaggedArray &blend, int W, int H, float contrastThreshold, float amount = 1.f) { -BENCHFUN - - if(contrastThreshold == 0.f) { - for(int j = 0; j < H; ++j) { - for(int i = 0; i < W; ++i) { - blend[j][i] = 1.f; - } - } - } else { - constexpr float scale = 0.0625f / 327.68f; -#ifdef _OPENMP - #pragma omp parallel -#endif - { -#ifdef __SSE2__ - const vfloat contrastThresholdv = F2V(contrastThreshold); - const vfloat scalev = F2V(scale); - const vfloat amountv = F2V(amount); -#endif -#ifdef _OPENMP - #pragma omp for schedule(dynamic,16) -#endif - - for(int j = 2; j < H - 2; ++j) { - int i = 2; -#ifdef __SSE2__ - for(; i < W - 5; i += 4) { - vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + - SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; - - STVFU(blend[j][i], amountv * calcBlendFactor(contrastv, contrastThresholdv)); - } -#endif - for(; i < W - 2; ++i) { - - float contrast = sqrtf(SQR(luminance[j][i+1] - luminance[j][i-1]) + SQR(luminance[j+1][i] - luminance[j-1][i]) + - SQR(luminance[j][i+2] - luminance[j][i-2]) + SQR(luminance[j+2][i] - luminance[j-2][i])) * scale; - - blend[j][i] = amount * calcBlendFactor(contrast, contrastThreshold); - } - } -#ifdef _OPENMP - #pragma omp single -#endif - { - // upper border - for(int j = 0; j < 2; ++j) { - for(int i = 2; i < W - 2; ++i) { - blend[j][i] = blend[2][i]; - } - } - // lower border - for(int j = H - 2; j < H; ++j) { - for(int i = 2; i < W - 2; ++i) { - blend[j][i] = blend[H-3][i]; - } - } - for(int j = 0; j < H; ++j) { - // left border - blend[j][0] = blend[j][1] = blend[j][2]; - // right border - blend[j][W - 2] = blend[j][W - 1] = blend[j][W - 3]; - } - } - // blur blend mask to smooth transitions - gaussianBlur(blend, blend, W, H, 2.0); - } - } -} - void sharpenHaloCtrl (float** luminance, float** blurmap, float** base, float** blend, int W, int H, const SharpeningParams &sharpenParam) { diff --git a/rtengine/procparams.cc b/rtengine/procparams.cc index dfcb91ebb..ec6df6ace 100644 --- a/rtengine/procparams.cc +++ b/rtengine/procparams.cc @@ -2446,7 +2446,8 @@ const std::vector& RAWParams::BayerSensor::getMethodStrings() "fast", "mono", "none", - "pixelshift" + "pixelshift", + "amazevng4" }; return method_strings; } diff --git a/rtengine/procparams.h b/rtengine/procparams.h index 467ecf1e3..e6a253955 100644 --- a/rtengine/procparams.h +++ b/rtengine/procparams.h @@ -1235,7 +1235,8 @@ struct RAWParams { FAST, MONO, NONE, - PIXELSHIFT + PIXELSHIFT, + AMAZEVNG4 }; enum class PSMotionCorrectionMethod { diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 80c7d8b86..9f217e357 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -2073,13 +2073,14 @@ void RawImageSource::demosaic(const RAWParams &raw) ahd_demosaic (); } else if (raw.bayersensor.method == RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::AMAZE) ) { amaze_demosaic_RT (0, 0, W, H, rawData, red, green, blue); + } else if (raw.bayersensor.method == RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::AMAZEVNG4) ) { + amaze_vng4_demosaic_RT (W, H, rawData, red, green, blue); } else if (raw.bayersensor.method == RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::PIXELSHIFT) ) { pixelshift(0, 0, W, H, raw.bayersensor, currFrame, ri->get_maker(), ri->get_model(), raw.expos); } else if (raw.bayersensor.method == RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::DCB) ) { dcb_demosaic(raw.bayersensor.dcb_iterations, raw.bayersensor.dcb_enhance); } else if (raw.bayersensor.method == RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::EAHD)) { - amaze_vng4_demosaic_RT (W, H, rawData, red, green, blue); -// eahd_demosaic (); + eahd_demosaic (); } else if (raw.bayersensor.method == RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::IGV)) { igv_interpolate(W, H); } else if (raw.bayersensor.method == RAWParams::BayerSensor::getMethodString(RAWParams::BayerSensor::Method::LMMSE)) { diff --git a/rtengine/rt_algo.cc b/rtengine/rt_algo.cc index 79508cfb3..21c8eac12 100644 --- a/rtengine/rt_algo.cc +++ b/rtengine/rt_algo.cc @@ -25,7 +25,31 @@ #include #endif +#include "gauss.h" +#include "opthelper.h" #include "rt_algo.h" +#include "rt_math.h" +#include "sleef.c" + +namespace { +float calcBlendFactor(float val, float threshold) { + // sigmoid function + // result is in ]0;1] range + // inflexion point is at (x, y) (threshold, 0.5) + return 1.f / (1.f + xexpf(16.f - 16.f * val / threshold)); +} + +#ifdef __SSE2__ +vfloat calcBlendFactor(vfloat valv, vfloat thresholdv) { + // sigmoid function + // result is in ]0;1] range + // inflexion point is at (x, y) (threshold, 0.5) + const vfloat onev = F2V(1.f); + const vfloat c16v = F2V(16.f); + return onev / (onev + xexpf(c16v - c16v * valv / thresholdv)); +} +#endif +} namespace rtengine { @@ -164,4 +188,73 @@ void findMinMaxPercentile(const float* data, size_t size, float minPrct, float& maxOut += minVal; } +void buildBlendMask(float** luminance, rtengine::JaggedArray &blend, int W, int H, float contrastThreshold, float amount) { + + if(contrastThreshold == 0.f) { + for(int j = 0; j < H; ++j) { + for(int i = 0; i < W; ++i) { + blend[j][i] = 1.f; + } + } + } else { + constexpr float scale = 0.0625f / 327.68f; +#ifdef _OPENMP + #pragma omp parallel +#endif + { +#ifdef __SSE2__ + const vfloat contrastThresholdv = F2V(contrastThreshold); + const vfloat scalev = F2V(scale); + const vfloat amountv = F2V(amount); +#endif +#ifdef _OPENMP + #pragma omp for schedule(dynamic,16) +#endif + + for(int j = 2; j < H - 2; ++j) { + int i = 2; +#ifdef __SSE2__ + for(; i < W - 5; i += 4) { + vfloat contrastv = vsqrtf(SQRV(LVFU(luminance[j][i+1]) - LVFU(luminance[j][i-1])) + SQRV(LVFU(luminance[j+1][i]) - LVFU(luminance[j-1][i])) + + SQRV(LVFU(luminance[j][i+2]) - LVFU(luminance[j][i-2])) + SQRV(LVFU(luminance[j+2][i]) - LVFU(luminance[j-2][i]))) * scalev; + + STVFU(blend[j][i], amountv * calcBlendFactor(contrastv, contrastThresholdv)); + } +#endif + for(; i < W - 2; ++i) { + + float contrast = sqrtf(rtengine::SQR(luminance[j][i+1] - luminance[j][i-1]) + rtengine::SQR(luminance[j+1][i] - luminance[j-1][i]) + + rtengine::SQR(luminance[j][i+2] - luminance[j][i-2]) + rtengine::SQR(luminance[j+2][i] - luminance[j-2][i])) * scale; + + blend[j][i] = amount * calcBlendFactor(contrast, contrastThreshold); + } + } +#ifdef _OPENMP + #pragma omp single +#endif + { + // upper border + for(int j = 0; j < 2; ++j) { + for(int i = 2; i < W - 2; ++i) { + blend[j][i] = blend[2][i]; + } + } + // lower border + for(int j = H - 2; j < H; ++j) { + for(int i = 2; i < W - 2; ++i) { + blend[j][i] = blend[H-3][i]; + } + } + for(int j = 0; j < H; ++j) { + // left border + blend[j][0] = blend[j][1] = blend[j][2]; + // right border + blend[j][W - 2] = blend[j][W - 1] = blend[j][W - 3]; + } + } + // blur blend mask to smooth transitions + gaussianBlur(blend, blend, W, H, 2.0); + } + } +} } diff --git a/rtengine/rt_algo.h b/rtengine/rt_algo.h index 2630bbf41..1aac26c3e 100644 --- a/rtengine/rt_algo.h +++ b/rtengine/rt_algo.h @@ -20,10 +20,10 @@ #pragma once #include +#include "jaggedarray.h" namespace rtengine { - void findMinMaxPercentile(const float* data, size_t size, float minPrct, float& minOut, float maxPrct, float& maxOut, bool multiThread = true); - +void buildBlendMask(float** luminance, rtengine::JaggedArray &blend, int W, int H, float contrastThreshold, float amount = 1.f); }