Cleaned gauss code and included some speedups
This commit is contained in:
parent
33ea7156b8
commit
a3c20daa46
@ -8,7 +8,7 @@ link_directories ("${PROJECT_SOURCE_DIR}/rtexif" ${EXTRA_LIBDIR} ${GTHREAD_LIBRA
|
||||
set (CAMCONSTSFILE "camconst.json")
|
||||
|
||||
set (RTENGINESOURCEFILES safegtk.cc colortemp.cc curves.cc flatcurves.cc diagonalcurves.cc dcraw.cc iccstore.cc color.cc
|
||||
dfmanager.cc ffmanager.cc rawimage.cc image8.cc image16.cc imagefloat.cc imagedata.cc imageio.cc improcfun.cc init.cc dcrop.cc
|
||||
dfmanager.cc ffmanager.cc gauss.cc rawimage.cc image8.cc image16.cc imagefloat.cc imagedata.cc imageio.cc improcfun.cc init.cc dcrop.cc
|
||||
loadinitial.cc procparams.cc rawimagesource.cc demosaic_algos.cc shmap.cc simpleprocess.cc refreshmap.cc
|
||||
fast_demo.cc amaze_demosaic_RT.cc CA_correct_RT.cc cfa_linedn_RT.cc green_equil_RT.cc hilite_recon.cc expo_before_b.cc
|
||||
stdimagesource.cc myfile.cc iccjpeg.cc hlmultipliers.cc improccoordinator.cc editbuffer.cc coord.cc
|
||||
|
@ -65,40 +65,12 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst,
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
gaussianBlur<float> (src->a, tmp1->a, src->W, src->H, radius);
|
||||
gaussianBlur<float> (src->b, tmp1->b, src->W, src->H, radius);
|
||||
gaussianBlur (src->a, tmp1->a, src->W, src->H, radius);
|
||||
gaussianBlur (src->b, tmp1->b, src->W, src->H, radius);
|
||||
}
|
||||
|
||||
float chromave = 0.0f;
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
if( chCurve ) {
|
||||
// vectorized precalculation of the atan2 values
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
int j;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < height; i++ )
|
||||
{
|
||||
for(j = 0; j < width - 3; j += 4) {
|
||||
_mm_storeu_ps(&fringe[i * width + j], xatan2f(LVFU(src->b[i][j]), LVFU(src->a[i][j])));
|
||||
}
|
||||
|
||||
for(; j < width; j++) {
|
||||
fringe[i * width + j] = xatan2f(src->b[i][j], src->a[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
@ -109,6 +81,23 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst,
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < height; i++ ) {
|
||||
#ifdef __SSE2__
|
||||
|
||||
// vectorized per row precalculation of the atan2 values
|
||||
if (chCurve) {
|
||||
int k = 0;
|
||||
|
||||
for(; k < width - 3; k += 4) {
|
||||
STVFU(fringe[i * width + k], xatan2f(LVFU(src->b[i][k]), LVFU(src->a[i][k])));
|
||||
}
|
||||
|
||||
for(; k < width; k++) {
|
||||
fringe[i * width + k] = xatan2f(src->b[i][k], src->a[i][k]);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __SSE2__
|
||||
|
||||
for(int j = 0; j < width; j++) {
|
||||
if (chCurve) {
|
||||
#ifdef __SSE2__
|
||||
@ -144,19 +133,21 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst,
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
__m128 sumv = _mm_set1_ps( chromave );
|
||||
__m128 onev = _mm_set1_ps( 1.0f );
|
||||
__m128 sumv = F2V( chromave );
|
||||
__m128 onev = F2V( 1.0f );
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
|
||||
for(int j = 0; j < width * height - 3; j += 4) {
|
||||
_mm_storeu_ps( &fringe[j], onev / (LVFU(fringe[j]) + sumv));
|
||||
STVFU(fringe[j], onev / (LVFU(fringe[j]) + sumv));
|
||||
}
|
||||
}
|
||||
|
||||
for(int j = width * height - (width * height) % 4; j < width * height; j++) {
|
||||
fringe[j] = 1.f / (fringe[j] + chromave);
|
||||
#pragma omp single
|
||||
|
||||
for(int j = width * height - (width * height) % 4; j < width * height; j++) {
|
||||
fringe[j] = 1.f / (fringe[j] + chromave);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
@ -191,8 +182,6 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst,
|
||||
tmp1->b[i][j] = src->b[i][j];
|
||||
|
||||
//test for pixel darker than some fraction of neighborhood ave, near an edge, more saturated than average
|
||||
/*if (100*tmp1->L[i][j]>50*src->L[i][j] && \*/
|
||||
/*1000*abs(tmp1->L[i][j]-src->L[i][j])>thresh*(tmp1->L[i][j]+src->L[i][j]) && \*/
|
||||
if (fringe[i * width + j] < threshfactor) {
|
||||
float atot = 0.f;
|
||||
float btot = 0.f;
|
||||
@ -218,8 +207,6 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst,
|
||||
tmp1->b[i][j] = src->b[i][j];
|
||||
|
||||
//test for pixel darker than some fraction of neighborhood ave, near an edge, more saturated than average
|
||||
/*if (100*tmp1->L[i][j]>50*src->L[i][j] && \*/
|
||||
/*1000*abs(tmp1->L[i][j]-src->L[i][j])>thresh*(tmp1->L[i][j]+src->L[i][j]) && \*/
|
||||
if (fringe[i * width + j] < threshfactor) {
|
||||
float atot = 0.f;
|
||||
float btot = 0.f;
|
||||
@ -245,8 +232,6 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst,
|
||||
tmp1->b[i][j] = src->b[i][j];
|
||||
|
||||
//test for pixel darker than some fraction of neighborhood ave, near an edge, more saturated than average
|
||||
/*if (100*tmp1->L[i][j]>50*src->L[i][j] && \*/
|
||||
/*1000*abs(tmp1->L[i][j]-src->L[i][j])>thresh*(tmp1->L[i][j]+src->L[i][j]) && \*/
|
||||
if (fringe[i * width + j] < threshfactor) {
|
||||
float atot = 0.f;
|
||||
float btot = 0.f;
|
||||
@ -355,7 +340,7 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds
|
||||
#ifdef __SSE2__
|
||||
int j;
|
||||
vfloat2 sincosvalv;
|
||||
__m128 piidv = _mm_set1_ps(piid);
|
||||
__m128 piidv = F2V(piid);
|
||||
#endif // __SSE2__
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
@ -366,8 +351,8 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds
|
||||
|
||||
for (j = 0; j < width - 3; j += 4) {
|
||||
sincosvalv = xsincosf(piidv * LVFU(src->h_p[i][j]));
|
||||
_mm_storeu_ps(&sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y);
|
||||
_mm_storeu_ps(&srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x);
|
||||
STVFU(sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y);
|
||||
STVFU(srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x);
|
||||
}
|
||||
|
||||
for (; j < width; j++) {
|
||||
@ -392,8 +377,8 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
gaussianBlur<float> (sraa, tmaa, src->W, src->H, radius);
|
||||
gaussianBlur<float> (srbb, tmbb, src->W, src->H, radius);
|
||||
gaussianBlur (sraa, tmaa, src->W, src->H, radius);
|
||||
gaussianBlur (srbb, tmbb, src->W, src->H, radius);
|
||||
}
|
||||
|
||||
float chromave = 0.0f;
|
||||
@ -414,7 +399,7 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds
|
||||
for(int i = 0; i < height; i++ )
|
||||
{
|
||||
for(j = 0; j < width - 3; j += 4) {
|
||||
_mm_storeu_ps(&fringe[i * width + j], xatan2f(LVFU(srbb[i][j]), LVFU(sraa[i][j])));
|
||||
STVFU(fringe[i * width + j], xatan2f(LVFU(srbb[i][j]), LVFU(sraa[i][j])));
|
||||
}
|
||||
|
||||
for(; j < width; j++) {
|
||||
@ -470,14 +455,14 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
__m128 sumv = _mm_set1_ps( chromave + eps2 );
|
||||
__m128 onev = _mm_set1_ps( 1.0f );
|
||||
__m128 sumv = F2V( chromave + eps2 );
|
||||
__m128 onev = F2V( 1.0f );
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for(int j = 0; j < width * height - 3; j += 4) {
|
||||
_mm_storeu_ps( &fringe[j], onev / (LVFU(fringe[j]) + sumv));
|
||||
STVFU(fringe[j], onev / (LVFU(fringe[j]) + sumv));
|
||||
}
|
||||
}
|
||||
|
||||
@ -599,7 +584,7 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds
|
||||
#ifdef __SSE2__
|
||||
int j;
|
||||
__m128 interav, interbv;
|
||||
__m128 piidv = _mm_set1_ps(piid);
|
||||
__m128 piidv = F2V(piid);
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
@ -609,11 +594,11 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds
|
||||
#ifdef __SSE2__
|
||||
|
||||
for(j = 0; j < width - 3; j += 4) {
|
||||
_mm_storeu_ps( &dst->sh_p[i][j], LVFU(src->sh_p[i][j]));
|
||||
STVFU(dst->sh_p[i][j], LVFU(src->sh_p[i][j]));
|
||||
interav = LVFU(tmaa[i][j]);
|
||||
interbv = LVFU(tmbb[i][j]);
|
||||
_mm_storeu_ps(&dst->h_p[i][j], (xatan2f(interbv, interav)) / piidv);
|
||||
_mm_storeu_ps(&dst->C_p[i][j], _mm_sqrt_ps(SQRV(interbv) + SQRV(interav)));
|
||||
STVFU(dst->h_p[i][j], (xatan2f(interbv, interav)) / piidv);
|
||||
STVFU(dst->C_p[i][j], vsqrtf(SQRV(interbv) + SQRV(interav)));
|
||||
}
|
||||
|
||||
for(; j < width; j++) {
|
||||
@ -730,7 +715,7 @@ SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, d
|
||||
#ifdef __SSE2__
|
||||
int j;
|
||||
vfloat2 sincosvalv;
|
||||
__m128 piidv = _mm_set1_ps(piid);
|
||||
__m128 piidv = F2V(piid);
|
||||
#endif // __SSE2__
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
@ -741,8 +726,8 @@ SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, d
|
||||
|
||||
for (j = 0; j < width - 3; j += 4) {
|
||||
sincosvalv = xsincosf(piidv * LVFU(src->h_p[i][j]));
|
||||
_mm_storeu_ps(&sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y);
|
||||
_mm_storeu_ps(&srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x);
|
||||
STVFU(sraa[i][j], LVFU(src->C_p[i][j])*sincosvalv.y);
|
||||
STVFU(srbb[i][j], LVFU(src->C_p[i][j])*sincosvalv.x);
|
||||
}
|
||||
|
||||
for (; j < width; j++) {
|
||||
@ -769,12 +754,12 @@ SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, d
|
||||
{
|
||||
//chroma a and b
|
||||
if(mode == 2) { //choice of gaussian blur
|
||||
gaussianBlur<float> (sraa, tmaa, src->W, src->H, radius);
|
||||
gaussianBlur<float> (srbb, tmbb, src->W, src->H, radius);
|
||||
gaussianBlur (sraa, tmaa, src->W, src->H, radius);
|
||||
gaussianBlur (srbb, tmbb, src->W, src->H, radius);
|
||||
}
|
||||
|
||||
//luma sh_p
|
||||
gaussianBlur<float> (src->sh_p, tmL, src->W, src->H, 2.0);//low value to avoid artifacts
|
||||
gaussianBlur (src->sh_p, tmL, src->W, src->H, 2.0);//low value to avoid artifacts
|
||||
}
|
||||
|
||||
if(mode == 1) { //choice of median
|
||||
@ -859,8 +844,8 @@ SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, d
|
||||
int j;
|
||||
#ifdef __SSE2__
|
||||
__m128 shfabsv, shmedv;
|
||||
__m128 shthrv = _mm_set1_ps(shthr);
|
||||
__m128 onev = _mm_set1_ps(1.0f);
|
||||
__m128 shthrv = F2V(shthr);
|
||||
__m128 onev = F2V(1.0f);
|
||||
#endif // __SSE2__
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for private(shfabs, shmed,i1,j1)
|
||||
@ -883,14 +868,14 @@ SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, d
|
||||
|
||||
for (; j < width - 5; j += 4) {
|
||||
shfabsv = vabsf(LVFU(src->sh_p[i][j]) - LVFU(tmL[i][j]));
|
||||
shmedv = _mm_setzero_ps();
|
||||
shmedv = ZEROV;
|
||||
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++ ) {
|
||||
shmedv += vabsf(LVFU(src->sh_p[i1][j1]) - LVFU(tmL[i1][j1]));
|
||||
}
|
||||
|
||||
_mm_storeu_ps( &badpix[i * width + j], vself(vmaskf_gt(shfabsv, (shmedv - shfabsv)*shthrv), onev, _mm_setzero_ps()));
|
||||
STVFU(badpix[i * width + j], vself(vmaskf_gt(shfabsv, (shmedv - shfabsv)*shthrv), onev, ZEROV));
|
||||
}
|
||||
|
||||
for (; j < width - 2; j++) {
|
||||
@ -1082,15 +1067,15 @@ SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, d
|
||||
#endif
|
||||
{
|
||||
int j;
|
||||
__m128 sumv = _mm_set1_ps( chrommed + eps2 );
|
||||
__m128 onev = _mm_set1_ps( 1.0f );
|
||||
__m128 sumv = F2V( chrommed + eps2 );
|
||||
__m128 onev = F2V( 1.0f );
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < height; i++) {
|
||||
for(j = 0; j < width - 3; j += 4) {
|
||||
_mm_storeu_ps( &badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + sumv));
|
||||
STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + sumv));
|
||||
}
|
||||
|
||||
for(; j < width; j++) {
|
||||
@ -1341,7 +1326,7 @@ SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, d
|
||||
#ifdef __SSE2__
|
||||
int j;
|
||||
// vfloat2 sincosvalv;
|
||||
// __m128 piidv = _mm_set1_ps(piid);
|
||||
// __m128 piidv = F2V(piid);
|
||||
#endif // __SSE2__
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
@ -1351,8 +1336,8 @@ SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, d
|
||||
#ifdef __SSE2__
|
||||
|
||||
for (j = 0; j < width - 3; j += 4) {
|
||||
_mm_storeu_ps(&sraa[i][j], LVFU(src->a[i][j]));
|
||||
_mm_storeu_ps(&srbb[i][j], LVFU(src->b[i][j]));
|
||||
STVFU(sraa[i][j], LVFU(src->a[i][j]));
|
||||
STVFU(srbb[i][j], LVFU(src->b[i][j]));
|
||||
}
|
||||
|
||||
for (; j < width; j++) {
|
||||
@ -1377,12 +1362,12 @@ SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, d
|
||||
{
|
||||
//chroma a and b
|
||||
if(mode >= 2) { //choice of gaussian blur
|
||||
gaussianBlur<float> (sraa, tmaa, src->W, src->H, radius);
|
||||
gaussianBlur<float> (srbb, tmbb, src->W, src->H, radius);
|
||||
gaussianBlur (sraa, tmaa, src->W, src->H, radius);
|
||||
gaussianBlur (srbb, tmbb, src->W, src->H, radius);
|
||||
}
|
||||
|
||||
//luma sh_p
|
||||
gaussianBlur<float> (src->L, tmL, src->W, src->H, 2.0);//low value to avoid artifacts
|
||||
gaussianBlur (src->L, tmL, src->W, src->H, 2.0);//low value to avoid artifacts
|
||||
}
|
||||
|
||||
if(mode == 1) { //choice of median
|
||||
@ -1467,8 +1452,8 @@ SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, d
|
||||
int j;
|
||||
#ifdef __SSE2__
|
||||
__m128 shfabsv, shmedv;
|
||||
__m128 shthrv = _mm_set1_ps(shthr);
|
||||
__m128 onev = _mm_set1_ps(1.0f);
|
||||
__m128 shthrv = F2V(shthr);
|
||||
__m128 onev = F2V(1.0f);
|
||||
#endif // __SSE2__
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for private(shfabs, shmed,i1,j1)
|
||||
@ -1491,14 +1476,14 @@ SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, d
|
||||
|
||||
for (; j < width - 5; j += 4) {
|
||||
shfabsv = vabsf(LVFU(src->L[i][j]) - LVFU(tmL[i][j]));
|
||||
shmedv = _mm_setzero_ps();
|
||||
shmedv = ZEROV;
|
||||
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++ ) {
|
||||
shmedv += vabsf(LVFU(src->L[i1][j1]) - LVFU(tmL[i1][j1]));
|
||||
}
|
||||
|
||||
_mm_storeu_ps( &badpix[i * width + j], vself(vmaskf_gt(shfabsv, (shmedv - shfabsv)*shthrv), onev, _mm_setzero_ps()));
|
||||
STVFU(badpix[i * width + j], vself(vmaskf_gt(shfabsv, (shmedv - shfabsv)*shthrv), onev, ZEROV));
|
||||
}
|
||||
|
||||
for (; j < width - 2; j++) {
|
||||
@ -1690,15 +1675,15 @@ SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, d
|
||||
#endif
|
||||
{
|
||||
int j;
|
||||
__m128 sumv = _mm_set1_ps( chrommed + eps2 );
|
||||
__m128 onev = _mm_set1_ps( 1.0f );
|
||||
__m128 sumv = F2V( chrommed + eps2 );
|
||||
__m128 onev = F2V( 1.0f );
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < height; i++) {
|
||||
for(j = 0; j < width - 3; j += 4) {
|
||||
_mm_storeu_ps( &badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + sumv));
|
||||
STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + sumv));
|
||||
}
|
||||
|
||||
for(; j < width; j++) {
|
||||
|
1301
rtengine/gauss.cc
Normal file
1301
rtengine/gauss.cc
Normal file
File diff suppressed because it is too large
Load Diff
795
rtengine/gauss.h
795
rtengine/gauss.h
@ -19,797 +19,8 @@
|
||||
#ifndef _GAUSS_H_
|
||||
#define _GAUSS_H_
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
#include "opthelper.h"
|
||||
#include "stdio.h"
|
||||
#include "boxblur.h"
|
||||
// classical filtering if the support window is small:
|
||||
enum eGaussType {GAUSS_STANDARD, GAUSS_MULT, GAUSS_DIV};
|
||||
|
||||
template<class T> void gaussHorizontal3 (T** src, T** dst, int W, int H, const float c0, const float c1)
|
||||
{
|
||||
void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, float *buffer = nullptr, eGaussType gausstype = GAUSS_STANDARD, float** buffer2 = nullptr);
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H; i++) {
|
||||
T temp[W] ALIGNED16;
|
||||
|
||||
for (int j = 1; j < W - 1; j++) {
|
||||
temp[j] = (T)(c1 * (src[i][j - 1] + src[i][j + 1]) + c0 * src[i][j]);
|
||||
}
|
||||
|
||||
dst[i][0] = src[i][0];
|
||||
memcpy (dst[i] + 1, temp + 1, (W - 2)*sizeof(T));
|
||||
|
||||
dst[i][W - 1] = src[i][W - 1];
|
||||
}
|
||||
}
|
||||
|
||||
template<class T> void gaussVertical3 (T** src, T** dst, int W, int H, const float c0, const float c1)
|
||||
{
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < W; i++) {
|
||||
T temp[H] ALIGNED16;
|
||||
|
||||
for (int j = 1; j < H - 1; j++) {
|
||||
temp[j] = (T)(c1 * (src[j - 1][i] + src[j + 1][i]) + c0 * src[j][i]);
|
||||
}
|
||||
|
||||
dst[0][i] = src[0][i];
|
||||
|
||||
for (int j = 1; j < H - 1; j++) {
|
||||
dst[j][i] = temp[j];
|
||||
}
|
||||
|
||||
dst[H - 1][i] = src[H - 1][i];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
template<class T> SSEFUNCTION void gaussVertical3Sse (T** src, T** dst, int W, int H, const float c0, const float c1)
|
||||
{
|
||||
vfloat Tv, Tm1v, Tp1v;
|
||||
vfloat c0v, c1v;
|
||||
c0v = F2V(c0);
|
||||
c1v = F2V(c1);
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < W - 3; i += 4) {
|
||||
Tm1v = LVFU( src[0][i] );
|
||||
STVFU( dst[0][i], Tm1v);
|
||||
|
||||
if (H > 1) {
|
||||
Tv = LVFU( src[1][i]);
|
||||
}
|
||||
|
||||
for (int j = 1; j < H - 1; j++) {
|
||||
Tp1v = LVFU( src[j + 1][i]);
|
||||
STVFU( dst[j][i], c1v * (Tp1v + Tm1v) + Tv * c0v);
|
||||
Tm1v = Tv;
|
||||
Tv = Tp1v;
|
||||
}
|
||||
|
||||
STVFU( dst[H - 1][i], LVFU( src[H - 1][i]));
|
||||
}
|
||||
|
||||
// Borders are done without SSE
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = W - (W % 4); i < W; i++) {
|
||||
dst[0][i] = src[0][i];
|
||||
|
||||
for (int j = 1; j < H - 1; j++) {
|
||||
dst[j][i] = c1 * (src[j - 1][i] + src[j + 1][i]) + c0 * src[j][i];
|
||||
}
|
||||
|
||||
dst[H - 1][i] = src[H - 1][i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T> SSEFUNCTION void gaussHorizontal3Sse (T** src, T** dst, int W, int H, const float c0, const float c1)
|
||||
{
|
||||
float tmp[W][4] ALIGNED16;
|
||||
|
||||
vfloat Tv, Tm1v, Tp1v;
|
||||
vfloat c0v, c1v;
|
||||
c0v = F2V(c0);
|
||||
c1v = F2V(c1);
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H - 3; i += 4) {
|
||||
dst[i][0] = src[i][0];
|
||||
dst[i + 1][0] = src[i + 1][0];
|
||||
dst[i + 2][0] = src[i + 2][0];
|
||||
dst[i + 3][0] = src[i + 3][0];
|
||||
Tm1v = _mm_set_ps( src[i][0], src[i + 1][0], src[i + 2][0], src[i + 3][0] );
|
||||
|
||||
if (W > 1) {
|
||||
Tv = _mm_set_ps( src[i][1], src[i + 1][1], src[i + 2][1], src[i + 3][1] );
|
||||
}
|
||||
|
||||
for (int j = 1; j < W - 1; j++) {
|
||||
Tp1v = _mm_set_ps( src[i][j + 1], src[i + 1][j + 1], src[i + 2][j + 1], src[i + 3][j + 1] );
|
||||
STVF( tmp[j][0], c1v * (Tp1v + Tm1v) + Tv * c0v);
|
||||
Tm1v = Tv;
|
||||
Tv = Tp1v;
|
||||
}
|
||||
|
||||
for (int j = 1; j < W - 1; j++) {
|
||||
dst[i + 3][j] = tmp[j][0];
|
||||
dst[i + 2][j] = tmp[j][1];
|
||||
dst[i + 1][j] = tmp[j][2];
|
||||
dst[i][j] = tmp[j][3];
|
||||
}
|
||||
|
||||
dst[i][W - 1] = src[i][W - 1];
|
||||
dst[i + 1][W - 1] = src[i + 1][W - 1];
|
||||
dst[i + 2][W - 1] = src[i + 2][W - 1];
|
||||
dst[i + 3][W - 1] = src[i + 3][W - 1];
|
||||
}
|
||||
|
||||
// Borders are done without SSE
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = H - (H % 4); i < H; i++) {
|
||||
dst[i][0] = src[i][0];
|
||||
|
||||
for (int j = 1; j < W - 1; j++) {
|
||||
dst[i][j] = c1 * (src[i][j - 1] + src[i][j + 1]) + c0 * src[i][j];
|
||||
}
|
||||
|
||||
dst[i][W - 1] = src[i][W - 1];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// fast gaussian approximation if the support window is large
|
||||
template<class T> SSEFUNCTION void gaussHorizontalSse (T** src, T** dst, int W, int H, float sigma)
|
||||
{
|
||||
|
||||
if (sigma < 0.25) {
|
||||
// dont perform filtering
|
||||
if (src != dst)
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i = 0; i < H; i++) {
|
||||
memcpy (dst[i], src[i], W * sizeof(T));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (sigma < 0.6) {
|
||||
// compute 3x3 kernel
|
||||
float c1 = exp (-1.0 / (2.0 * sigma * sigma));
|
||||
float csum = 2.0 * c1 + 1.0;
|
||||
c1 /= csum;
|
||||
float c0 = 1.0 / csum;
|
||||
gaussHorizontal3Sse<T> (src, dst, W, H, c0, c1);
|
||||
return;
|
||||
}
|
||||
|
||||
// coefficient calculation
|
||||
float q = 0.98711 * sigma - 0.96330;
|
||||
|
||||
if (sigma < 2.5) {
|
||||
q = 3.97156 - 4.14554 * sqrt (1.0 - 0.26891 * sigma);
|
||||
}
|
||||
|
||||
float b0 = 1.57825 + 2.44413 * q + 1.4281 * q * q + 0.422205 * q * q * q;
|
||||
float b1 = 2.44413 * q + 2.85619 * q * q + 1.26661 * q * q * q;
|
||||
float b2 = -1.4281 * q * q - 1.26661 * q * q * q;
|
||||
float b3 = 0.422205 * q * q * q;
|
||||
float B = 1.0 - (b1 + b2 + b3) / b0;
|
||||
|
||||
b1 /= b0;
|
||||
b2 /= b0;
|
||||
b3 /= b0;
|
||||
|
||||
// From: Bill Triggs, Michael Sdika: Boundary Conditions for Young-van Vliet Recursive Filtering
|
||||
float M[3][3];
|
||||
M[0][0] = -b3 * b1 + 1.0 - b3 * b3 - b2;
|
||||
M[0][1] = (b3 + b1) * (b2 + b3 * b1);
|
||||
M[0][2] = b3 * (b1 + b3 * b2);
|
||||
M[1][0] = b1 + b3 * b2;
|
||||
M[1][1] = -(b2 - 1.0) * (b2 + b3 * b1);
|
||||
M[1][2] = -(b3 * b1 + b3 * b3 + b2 - 1.0) * b3;
|
||||
M[2][0] = b3 * b1 + b2 + b1 * b1 - b2 * b2;
|
||||
M[2][1] = b1 * b2 + b3 * b2 * b2 - b1 * b3 * b3 - b3 * b3 * b3 - b3 * b2 + b3;
|
||||
M[2][2] = b3 * (b1 + b3 * b2);
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
for (int j = 0; j < 3; j++) {
|
||||
M[i][j] *= (1.0 + b2 + (b1 - b3) * b3);
|
||||
M[i][j] /= (1.0 + b1 - b2 + b3) * (1.0 - b1 - b2 - b3);
|
||||
}
|
||||
|
||||
vfloat Rv;
|
||||
vfloat Tv, Tm2v, Tm3v;
|
||||
vfloat Bv, b1v, b2v, b3v;
|
||||
vfloat temp2W, temp2Wp1;
|
||||
float tmp[W][4] ALIGNED16;
|
||||
float tmpV[4] ALIGNED16;
|
||||
Bv = F2V(B);
|
||||
b1v = F2V(b1);
|
||||
b2v = F2V(b2);
|
||||
b3v = F2V(b3);
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H - 3; i += 4) {
|
||||
tmpV[0] = src[i + 3][0];
|
||||
tmpV[1] = src[i + 2][0];
|
||||
tmpV[2] = src[i + 1][0];
|
||||
tmpV[3] = src[i][0];
|
||||
Tv = LVF(tmpV[0]);
|
||||
Rv = Tv * (Bv + b1v + b2v + b3v);
|
||||
Tm3v = Rv;
|
||||
STVF( tmp[0][0], Rv );
|
||||
|
||||
tmpV[0] = src[i + 3][1];
|
||||
tmpV[1] = src[i + 2][1];
|
||||
tmpV[2] = src[i + 1][1];
|
||||
tmpV[3] = src[i][1];
|
||||
Rv = LVF(tmpV[0]) * Bv + Rv * b1v + Tv * (b2v + b3v);
|
||||
Tm2v = Rv;
|
||||
STVF( tmp[1][0], Rv );
|
||||
|
||||
tmpV[0] = src[i + 3][2];
|
||||
tmpV[1] = src[i + 2][2];
|
||||
tmpV[2] = src[i + 1][2];
|
||||
tmpV[3] = src[i][2];
|
||||
Rv = LVF(tmpV[0]) * Bv + Rv * b1v + Tm3v * b2v + Tv * b3v;
|
||||
STVF( tmp[2][0], Rv );
|
||||
|
||||
for (int j = 3; j < W; j++) {
|
||||
Tv = Rv;
|
||||
Rv = _mm_set_ps(src[i][j], src[i + 1][j], src[i + 2][j], src[i + 3][j]) * Bv + Tv * b1v + Tm2v * b2v + Tm3v * b3v;
|
||||
STVF( tmp[j][0], Rv );
|
||||
Tm3v = Tm2v;
|
||||
Tm2v = Tv;
|
||||
}
|
||||
|
||||
Tv = _mm_set_ps(src[i][W - 1], src[i + 1][W - 1], src[i + 2][W - 1], src[i + 3][W - 1]);
|
||||
|
||||
temp2Wp1 = Tv + F2V(M[2][0]) * (Rv - Tv) + F2V(M[2][1]) * ( Tm2v - Tv ) + F2V(M[2][2]) * (Tm3v - Tv);
|
||||
temp2W = Tv + F2V(M[1][0]) * (Rv - Tv) + F2V(M[1][1]) * (Tm2v - Tv) + F2V(M[1][2]) * (Tm3v - Tv);
|
||||
|
||||
Rv = Tv + F2V(M[0][0]) * (Rv - Tv) + F2V(M[0][1]) * (Tm2v - Tv) + F2V(M[0][2]) * (Tm3v - Tv);
|
||||
STVF( tmp[W - 1][0], Rv );
|
||||
|
||||
Tm2v = Bv * Tm2v + b1v * Rv + b2v * temp2W + b3v * temp2Wp1;
|
||||
STVF( tmp[W - 2][0], Tm2v );
|
||||
|
||||
Tm3v = Bv * Tm3v + b1v * Tm2v + b2v * Rv + b3v * temp2W;
|
||||
STVF( tmp[W - 3][0], Tm3v );
|
||||
|
||||
Tv = Rv;
|
||||
Rv = Tm3v;
|
||||
Tm3v = Tv;
|
||||
|
||||
for (int j = W - 4; j >= 0; j--) {
|
||||
Tv = Rv;
|
||||
Rv = LVF(tmp[j][0]) * Bv + Tv * b1v + Tm2v * b2v + Tm3v * b3v;
|
||||
STVF( tmp[j][0], Rv );
|
||||
Tm3v = Tm2v;
|
||||
Tm2v = Tv;
|
||||
}
|
||||
|
||||
for (int j = 0; j < W; j++) {
|
||||
dst[i + 3][j] = tmp[j][0];
|
||||
dst[i + 2][j] = tmp[j][1];
|
||||
dst[i + 1][j] = tmp[j][2];
|
||||
dst[i][j] = tmp[j][3];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
// Borders are done without SSE
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = H - (H % 4); i < H; i++) {
|
||||
tmp[0][0] = B * src[i][0] + b1 * src[i][0] + b2 * src[i][0] + b3 * src[i][0];
|
||||
tmp[1][0] = B * src[i][1] + b1 * tmp[0][0] + b2 * src[i][0] + b3 * src[i][0];
|
||||
tmp[2][0] = B * src[i][2] + b1 * tmp[1][0] + b2 * tmp[0][0] + b3 * src[i][0];
|
||||
|
||||
for (int j = 3; j < W; j++) {
|
||||
tmp[j][0] = B * src[i][j] + b1 * tmp[j - 1][0] + b2 * tmp[j - 2][0] + b3 * tmp[j - 3][0];
|
||||
}
|
||||
|
||||
float temp2Wm1 = src[i][W - 1] + M[0][0] * (tmp[W - 1][0] - src[i][W - 1]) + M[0][1] * (tmp[W - 2][0] - src[i][W - 1]) + M[0][2] * (tmp[W - 3][0] - src[i][W - 1]);
|
||||
float temp2W = src[i][W - 1] + M[1][0] * (tmp[W - 1][0] - src[i][W - 1]) + M[1][1] * (tmp[W - 2][0] - src[i][W - 1]) + M[1][2] * (tmp[W - 3][0] - src[i][W - 1]);
|
||||
float temp2Wp1 = src[i][W - 1] + M[2][0] * (tmp[W - 1][0] - src[i][W - 1]) + M[2][1] * (tmp[W - 2][0] - src[i][W - 1]) + M[2][2] * (tmp[W - 3][0] - src[i][W - 1]);
|
||||
|
||||
tmp[W - 1][0] = temp2Wm1;
|
||||
tmp[W - 2][0] = B * tmp[W - 2][0] + b1 * tmp[W - 1][0] + b2 * temp2W + b3 * temp2Wp1;
|
||||
tmp[W - 3][0] = B * tmp[W - 3][0] + b1 * tmp[W - 2][0] + b2 * tmp[W - 1][0] + b3 * temp2W;
|
||||
|
||||
for (int j = W - 4; j >= 0; j--) {
|
||||
tmp[j][0] = B * tmp[j][0] + b1 * tmp[j + 1][0] + b2 * tmp[j + 2][0] + b3 * tmp[j + 3][0];
|
||||
}
|
||||
|
||||
for (int j = 0; j < W; j++) {
|
||||
dst[i][j] = tmp[j][0];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// fast gaussian approximation if the support window is large
|
||||
|
||||
template<class T> void gaussHorizontal (T** src, T** dst, int W, int H, double sigma)
|
||||
{
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
if (sigma < 70) { // bigger sigma only with double precision
|
||||
gaussHorizontalSse<T> (src, dst, W, H, sigma);
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if (sigma < 0.25) {
|
||||
// dont perform filtering
|
||||
if (src != dst)
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i = 0; i < H; i++) {
|
||||
memcpy (dst[i], src[i], W * sizeof(T));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (sigma < 0.6) {
|
||||
// compute 3x3 kernel
|
||||
double c1 = exp (-1.0 / (2.0 * sigma * sigma));
|
||||
double csum = 2.0 * c1 + 1.0;
|
||||
c1 /= csum;
|
||||
double c0 = 1.0 / csum;
|
||||
gaussHorizontal3<T> (src, dst, W, H, c0, c1);
|
||||
return;
|
||||
}
|
||||
|
||||
// coefficient calculation
|
||||
double q = 0.98711 * sigma - 0.96330;
|
||||
|
||||
if (sigma < 2.5) {
|
||||
q = 3.97156 - 4.14554 * sqrt (1.0 - 0.26891 * sigma);
|
||||
}
|
||||
|
||||
double b0 = 1.57825 + 2.44413 * q + 1.4281 * q * q + 0.422205 * q * q * q;
|
||||
double b1 = 2.44413 * q + 2.85619 * q * q + 1.26661 * q * q * q;
|
||||
double b2 = -1.4281 * q * q - 1.26661 * q * q * q;
|
||||
double b3 = 0.422205 * q * q * q;
|
||||
double B = 1.0 - (b1 + b2 + b3) / b0;
|
||||
|
||||
b1 /= b0;
|
||||
b2 /= b0;
|
||||
b3 /= b0;
|
||||
|
||||
// From: Bill Triggs, Michael Sdika: Boundary Conditions for Young-van Vliet Recursive Filtering
|
||||
double M[3][3];
|
||||
M[0][0] = -b3 * b1 + 1.0 - b3 * b3 - b2;
|
||||
M[0][1] = (b3 + b1) * (b2 + b3 * b1);
|
||||
M[0][2] = b3 * (b1 + b3 * b2);
|
||||
M[1][0] = b1 + b3 * b2;
|
||||
M[1][1] = -(b2 - 1.0) * (b2 + b3 * b1);
|
||||
M[1][2] = -(b3 * b1 + b3 * b3 + b2 - 1.0) * b3;
|
||||
M[2][0] = b3 * b1 + b2 + b1 * b1 - b2 * b2;
|
||||
M[2][1] = b1 * b2 + b3 * b2 * b2 - b1 * b3 * b3 - b3 * b3 * b3 - b3 * b2 + b3;
|
||||
M[2][2] = b3 * (b1 + b3 * b2);
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
for (int j = 0; j < 3; j++) {
|
||||
M[i][j] /= (1.0 + b1 - b2 + b3) * (1.0 + b2 + (b1 - b3) * b3);
|
||||
}
|
||||
|
||||
double temp2[W] ALIGNED16;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H; i++) {
|
||||
|
||||
temp2[0] = B * src[i][0] + b1 * src[i][0] + b2 * src[i][0] + b3 * src[i][0];
|
||||
temp2[1] = B * src[i][1] + b1 * temp2[0] + b2 * src[i][0] + b3 * src[i][0];
|
||||
temp2[2] = B * src[i][2] + b1 * temp2[1] + b2 * temp2[0] + b3 * src[i][0];
|
||||
|
||||
for (int j = 3; j < W; j++) {
|
||||
temp2[j] = B * src[i][j] + b1 * temp2[j - 1] + b2 * temp2[j - 2] + b3 * temp2[j - 3];
|
||||
}
|
||||
|
||||
double temp2Wm1 = src[i][W - 1] + M[0][0] * (temp2[W - 1] - src[i][W - 1]) + M[0][1] * (temp2[W - 2] - src[i][W - 1]) + M[0][2] * (temp2[W - 3] - src[i][W - 1]);
|
||||
double temp2W = src[i][W - 1] + M[1][0] * (temp2[W - 1] - src[i][W - 1]) + M[1][1] * (temp2[W - 2] - src[i][W - 1]) + M[1][2] * (temp2[W - 3] - src[i][W - 1]);
|
||||
double temp2Wp1 = src[i][W - 1] + M[2][0] * (temp2[W - 1] - src[i][W - 1]) + M[2][1] * (temp2[W - 2] - src[i][W - 1]) + M[2][2] * (temp2[W - 3] - src[i][W - 1]);
|
||||
|
||||
temp2[W - 1] = temp2Wm1;
|
||||
temp2[W - 2] = B * temp2[W - 2] + b1 * temp2[W - 1] + b2 * temp2W + b3 * temp2Wp1;
|
||||
temp2[W - 3] = B * temp2[W - 3] + b1 * temp2[W - 2] + b2 * temp2[W - 1] + b3 * temp2W;
|
||||
|
||||
for (int j = W - 4; j >= 0; j--) {
|
||||
temp2[j] = B * temp2[j] + b1 * temp2[j + 1] + b2 * temp2[j + 2] + b3 * temp2[j + 3];
|
||||
}
|
||||
|
||||
for (int j = 0; j < W; j++) {
|
||||
dst[i][j] = (T)temp2[j];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
template<class T> SSEFUNCTION void gaussVerticalSse (T** src, T** dst, int W, int H, float sigma)
|
||||
{
|
||||
|
||||
if (sigma < 0.25) {
|
||||
// dont perform filtering
|
||||
if (src != dst)
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i = 0; i < H; i++) {
|
||||
memcpy (dst[i], src[i], W * sizeof(T));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (sigma < 0.6) {
|
||||
// compute 3x3 kernel
|
||||
double c1 = exp (-1.0 / (2.0 * sigma * sigma));
|
||||
double csum = 2.0 * c1 + 1.0;
|
||||
c1 /= csum;
|
||||
double c0 = 1.0 / csum;
|
||||
gaussVertical3Sse<T> (src, dst, W, H, c0, c1);
|
||||
return;
|
||||
}
|
||||
|
||||
// coefficient calculation
|
||||
double q = 0.98711 * sigma - 0.96330;
|
||||
|
||||
if (sigma < 2.5) {
|
||||
q = 3.97156 - 4.14554 * sqrt (1.0 - 0.26891 * sigma);
|
||||
}
|
||||
|
||||
double b0 = 1.57825 + 2.44413 * q + 1.4281 * q * q + 0.422205 * q * q * q;
|
||||
double b1 = 2.44413 * q + 2.85619 * q * q + 1.26661 * q * q * q;
|
||||
double b2 = -1.4281 * q * q - 1.26661 * q * q * q;
|
||||
double b3 = 0.422205 * q * q * q;
|
||||
double B = 1.0 - (b1 + b2 + b3) / b0;
|
||||
|
||||
b1 /= b0;
|
||||
b2 /= b0;
|
||||
b3 /= b0;
|
||||
|
||||
// From: Bill Triggs, Michael Sdika: Boundary Conditions for Young-van Vliet Recursive Filtering
|
||||
double M[3][3];
|
||||
M[0][0] = -b3 * b1 + 1.0 - b3 * b3 - b2;
|
||||
M[0][1] = (b3 + b1) * (b2 + b3 * b1);
|
||||
M[0][2] = b3 * (b1 + b3 * b2);
|
||||
M[1][0] = b1 + b3 * b2;
|
||||
M[1][1] = -(b2 - 1.0) * (b2 + b3 * b1);
|
||||
M[1][2] = -(b3 * b1 + b3 * b3 + b2 - 1.0) * b3;
|
||||
M[2][0] = b3 * b1 + b2 + b1 * b1 - b2 * b2;
|
||||
M[2][1] = b1 * b2 + b3 * b2 * b2 - b1 * b3 * b3 - b3 * b3 * b3 - b3 * b2 + b3;
|
||||
M[2][2] = b3 * (b1 + b3 * b2);
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
for (int j = 0; j < 3; j++) {
|
||||
M[i][j] *= (1.0 + b2 + (b1 - b3) * b3);
|
||||
M[i][j] /= (1.0 + b1 - b2 + b3) * (1.0 - b1 - b2 - b3);
|
||||
}
|
||||
|
||||
float tmp[H][4] ALIGNED16;
|
||||
vfloat Rv;
|
||||
vfloat Tv, Tm2v, Tm3v;
|
||||
vfloat Bv, b1v, b2v, b3v;
|
||||
vfloat temp2W, temp2Wp1;
|
||||
Bv = F2V(B);
|
||||
b1v = F2V(b1);
|
||||
b2v = F2V(b2);
|
||||
b3v = F2V(b3);
|
||||
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < W - 3; i += 4) {
|
||||
Tv = LVFU( src[0][i]);
|
||||
Rv = Tv * (Bv + b1v + b2v + b3v);
|
||||
Tm3v = Rv;
|
||||
STVF( tmp[0][0], Rv );
|
||||
|
||||
Rv = LVFU(src[1][i]) * Bv + Rv * b1v + Tv * (b2v + b3v);
|
||||
Tm2v = Rv;
|
||||
STVF( tmp[1][0], Rv );
|
||||
|
||||
Rv = LVFU(src[2][i]) * Bv + Rv * b1v + Tm3v * b2v + Tv * b3v;
|
||||
STVF( tmp[2][0], Rv );
|
||||
|
||||
for (int j = 3; j < H; j++) {
|
||||
Tv = Rv;
|
||||
Rv = LVFU(src[j][i]) * Bv + Tv * b1v + Tm2v * b2v + Tm3v * b3v;
|
||||
STVF( tmp[j][0], Rv );
|
||||
Tm3v = Tm2v;
|
||||
Tm2v = Tv;
|
||||
}
|
||||
|
||||
Tv = LVFU(src[H - 1][i]);
|
||||
|
||||
temp2Wp1 = Tv + F2V(M[2][0]) * (Rv - Tv) + F2V(M[2][1]) * (Tm2v - Tv) + F2V(M[2][2]) * (Tm3v - Tv);
|
||||
temp2W = Tv + F2V(M[1][0]) * (Rv - Tv) + F2V(M[1][1]) * (Tm2v - Tv) + F2V(M[1][2]) * (Tm3v - Tv);
|
||||
|
||||
Rv = Tv + F2V(M[0][0]) * (Rv - Tv) + F2V(M[0][1]) * (Tm2v - Tv) + F2V(M[0][2]) * (Tm3v - Tv);
|
||||
STVFU( dst[H - 1][i], Rv );
|
||||
|
||||
Tm2v = Bv * Tm2v + b1v * Rv + b2v * temp2W + b3v * temp2Wp1;
|
||||
STVFU( dst[H - 2][i], Tm2v );
|
||||
|
||||
Tm3v = Bv * Tm3v + b1v * Tm2v + b2v * Rv + b3v * temp2W;
|
||||
STVFU( dst[H - 3][i], Tm3v );
|
||||
|
||||
Tv = Rv;
|
||||
Rv = Tm3v;
|
||||
Tm3v = Tv;
|
||||
|
||||
for (int j = H - 4; j >= 0; j--) {
|
||||
Tv = Rv;
|
||||
Rv = LVF(tmp[j][0]) * Bv + Tv * b1v + Tm2v * b2v + Tm3v * b3v;
|
||||
STVFU( dst[j][i], Rv );
|
||||
Tm3v = Tm2v;
|
||||
Tm2v = Tv;
|
||||
}
|
||||
}
|
||||
|
||||
// Borders are done without SSE
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = W - (W % 4); i < W; i++) {
|
||||
tmp[0][0] = B * src[0][i] + b1 * src[0][i] + b2 * src[0][i] + b3 * src[0][i];
|
||||
tmp[1][0] = B * src[1][i] + b1 * tmp[0][0] + b2 * src[0][i] + b3 * src[0][i];
|
||||
tmp[2][0] = B * src[2][i] + b1 * tmp[1][0] + b2 * tmp[0][0] + b3 * src[0][i];
|
||||
|
||||
for (int j = 3; j < H; j++) {
|
||||
tmp[j][0] = B * src[j][i] + b1 * tmp[j - 1][0] + b2 * tmp[j - 2][0] + b3 * tmp[j - 3][0];
|
||||
}
|
||||
|
||||
float temp2Hm1 = src[H - 1][i] + M[0][0] * (tmp[H - 1][0] - src[H - 1][i]) + M[0][1] * (tmp[H - 2][0] - src[H - 1][i]) + M[0][2] * (tmp[H - 3][0] - src[H - 1][i]);
|
||||
float temp2H = src[H - 1][i] + M[1][0] * (tmp[H - 1][0] - src[H - 1][i]) + M[1][1] * (tmp[H - 2][0] - src[H - 1][i]) + M[1][2] * (tmp[H - 3][0] - src[H - 1][i]);
|
||||
float temp2Hp1 = src[H - 1][i] + M[2][0] * (tmp[H - 1][0] - src[H - 1][i]) + M[2][1] * (tmp[H - 2][0] - src[H - 1][i]) + M[2][2] * (tmp[H - 3][0] - src[H - 1][i]);
|
||||
|
||||
tmp[H - 1][0] = temp2Hm1;
|
||||
tmp[H - 2][0] = B * tmp[H - 2][0] + b1 * tmp[H - 1][0] + b2 * temp2H + b3 * temp2Hp1;
|
||||
tmp[H - 3][0] = B * tmp[H - 3][0] + b1 * tmp[H - 2][0] + b2 * tmp[H - 1][0] + b3 * temp2H;
|
||||
|
||||
for (int j = H - 4; j >= 0; j--) {
|
||||
tmp[j][0] = B * tmp[j][0] + b1 * tmp[j + 1][0] + b2 * tmp[j + 2][0] + b3 * tmp[j + 3][0];
|
||||
}
|
||||
|
||||
for (int j = 0; j < H; j++) {
|
||||
dst[j][i] = tmp[j][0];
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<class T> void gaussVertical (T** src, T** dst, int W, int H, double sigma)
|
||||
{
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
if (sigma < 70) { // bigger sigma only with double precision
|
||||
gaussVerticalSse<T> (src, dst, W, H, sigma);
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if (sigma < 0.25) {
|
||||
// don't perform filtering
|
||||
if (src != dst)
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i = 0; i < H; i++) {
|
||||
memcpy (dst[i], src[i], W * sizeof(T));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (sigma < 0.6) {
|
||||
// compute 3x3 kernel
|
||||
double c1 = exp (-1.0 / (2.0 * sigma * sigma));
|
||||
double csum = 2.0 * c1 + 1.0;
|
||||
c1 /= csum;
|
||||
double c0 = 1.0 / csum;
|
||||
gaussVertical3<T> (src, dst, W, H, c0, c1);
|
||||
return;
|
||||
}
|
||||
|
||||
// coefficient calculation
|
||||
double q = 0.98711 * sigma - 0.96330;
|
||||
|
||||
if (sigma < 2.5) {
|
||||
q = 3.97156 - 4.14554 * sqrt (1.0 - 0.26891 * sigma);
|
||||
}
|
||||
|
||||
double b0 = 1.57825 + 2.44413 * q + 1.4281 * q * q + 0.422205 * q * q * q;
|
||||
double b1 = 2.44413 * q + 2.85619 * q * q + 1.26661 * q * q * q;
|
||||
double b2 = -1.4281 * q * q - 1.26661 * q * q * q;
|
||||
double b3 = 0.422205 * q * q * q;
|
||||
double B = 1.0 - (b1 + b2 + b3) / b0;
|
||||
|
||||
b1 /= b0;
|
||||
b2 /= b0;
|
||||
b3 /= b0;
|
||||
|
||||
// From: Bill Triggs, Michael Sdika: Boundary Conditions for Young-van Vliet Recursive Filtering
|
||||
double M[3][3];
|
||||
M[0][0] = -b3 * b1 + 1.0 - b3 * b3 - b2;
|
||||
M[0][1] = (b3 + b1) * (b2 + b3 * b1);
|
||||
M[0][2] = b3 * (b1 + b3 * b2);
|
||||
M[1][0] = b1 + b3 * b2;
|
||||
M[1][1] = -(b2 - 1.0) * (b2 + b3 * b1);
|
||||
M[1][2] = -(b3 * b1 + b3 * b3 + b2 - 1.0) * b3;
|
||||
M[2][0] = b3 * b1 + b2 + b1 * b1 - b2 * b2;
|
||||
M[2][1] = b1 * b2 + b3 * b2 * b2 - b1 * b3 * b3 - b3 * b3 * b3 - b3 * b2 + b3;
|
||||
M[2][2] = b3 * (b1 + b3 * b2);
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
for (int j = 0; j < 3; j++) {
|
||||
M[i][j] /= (1.0 + b1 - b2 + b3) * (1.0 + b2 + (b1 - b3) * b3);
|
||||
}
|
||||
|
||||
// process 'numcols' columns for better usage of L1 cpu cache (especially faster for large values of H)
|
||||
static const int numcols = 8;
|
||||
double temp2[H][numcols] ALIGNED16;
|
||||
double temp2Hm1[numcols], temp2H[numcols], temp2Hp1[numcols];
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < W - numcols + 1; i += numcols) {
|
||||
for (int k = 0; k < numcols; k++) {
|
||||
temp2[0][k] = B * src[0][i + k] + b1 * src[0][i + k] + b2 * src[0][i + k] + b3 * src[0][i + k];
|
||||
temp2[1][k] = B * src[1][i + k] + b1 * temp2[0][k] + b2 * src[0][i + k] + b3 * src[0][i + k];
|
||||
temp2[2][k] = B * src[2][i + k] + b1 * temp2[1][k] + b2 * temp2[0][k] + b3 * src[0][i + k];
|
||||
}
|
||||
|
||||
for (int j = 3; j < H; j++) {
|
||||
for (int k = 0; k < numcols; k++) {
|
||||
temp2[j][k] = B * src[j][i + k] + b1 * temp2[j - 1][k] + b2 * temp2[j - 2][k] + b3 * temp2[j - 3][k];
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = 0; k < numcols; k++) {
|
||||
temp2Hm1[k] = src[H - 1][i + k] + M[0][0] * (temp2[H - 1][k] - src[H - 1][i + k]) + M[0][1] * (temp2[H - 2][k] - src[H - 1][i + k]) + M[0][2] * (temp2[H - 3][k] - src[H - 1][i + k]);
|
||||
temp2H[k] = src[H - 1][i + k] + M[1][0] * (temp2[H - 1][k] - src[H - 1][i + k]) + M[1][1] * (temp2[H - 2][k] - src[H - 1][i + k]) + M[1][2] * (temp2[H - 3][k] - src[H - 1][i + k]);
|
||||
temp2Hp1[k] = src[H - 1][i + k] + M[2][0] * (temp2[H - 1][k] - src[H - 1][i + k]) + M[2][1] * (temp2[H - 2][k] - src[H - 1][i + k]) + M[2][2] * (temp2[H - 3][k] - src[H - 1][i + k]);
|
||||
}
|
||||
|
||||
for (int k = 0; k < numcols; k++) {
|
||||
dst[H - 1][i + k] = temp2[H - 1][k] = temp2Hm1[k];
|
||||
dst[H - 2][i + k] = temp2[H - 2][k] = B * temp2[H - 2][k] + b1 * temp2[H - 1][k] + b2 * temp2H[k] + b3 * temp2Hp1[k];
|
||||
dst[H - 3][i + k] = temp2[H - 3][k] = B * temp2[H - 3][k] + b1 * temp2[H - 2][k] + b2 * temp2[H - 1][k] + b3 * temp2H[k];
|
||||
}
|
||||
|
||||
for (int j = H - 4; j >= 0; j--) {
|
||||
for (int k = 0; k < numcols; k++) {
|
||||
dst[j][i + k] = temp2[j][k] = B * temp2[j][k] + b1 * temp2[j + 1][k] + b2 * temp2[j + 2][k] + b3 * temp2[j + 3][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp single
|
||||
#endif
|
||||
|
||||
// process remaining column
|
||||
for (int i = W - (W % numcols); i < W; i++) {
|
||||
temp2[0][0] = B * src[0][i] + b1 * src[0][i] + b2 * src[0][i] + b3 * src[0][i];
|
||||
temp2[1][0] = B * src[1][i] + b1 * temp2[0][0] + b2 * src[0][i] + b3 * src[0][i];
|
||||
temp2[2][0] = B * src[2][i] + b1 * temp2[1][0] + b2 * temp2[0][0] + b3 * src[0][i];
|
||||
|
||||
for (int j = 3; j < H; j++) {
|
||||
temp2[j][0] = B * src[j][i] + b1 * temp2[j - 1][0] + b2 * temp2[j - 2][0] + b3 * temp2[j - 3][0];
|
||||
}
|
||||
|
||||
double temp2Hm1 = src[H - 1][i] + M[0][0] * (temp2[H - 1][0] - src[H - 1][i]) + M[0][1] * (temp2[H - 2][0] - src[H - 1][i]) + M[0][2] * (temp2[H - 3][0] - src[H - 1][i]);
|
||||
double temp2H = src[H - 1][i] + M[1][0] * (temp2[H - 1][0] - src[H - 1][i]) + M[1][1] * (temp2[H - 2][0] - src[H - 1][i]) + M[1][2] * (temp2[H - 3][0] - src[H - 1][i]);
|
||||
double temp2Hp1 = src[H - 1][i] + M[2][0] * (temp2[H - 1][0] - src[H - 1][i]) + M[2][1] * (temp2[H - 2][0] - src[H - 1][i]) + M[2][2] * (temp2[H - 3][0] - src[H - 1][i]);
|
||||
|
||||
dst[H - 1][i] = temp2[H - 1][0] = temp2Hm1;
|
||||
dst[H - 2][i] = temp2[H - 2][0] = B * temp2[H - 2][0] + b1 * temp2[H - 1][0] + b2 * temp2H + b3 * temp2Hp1;
|
||||
dst[H - 3][i] = temp2[H - 3][0] = B * temp2[H - 3][0] + b1 * temp2[H - 2][0] + b2 * temp2[H - 1][0] + b3 * temp2H;
|
||||
|
||||
for (int j = H - 4; j >= 0; j--) {
|
||||
dst[j][i] = temp2[j][0] = B * temp2[j][0] + b1 * temp2[j + 1][0] + b2 * temp2[j + 2][0] + b3 * temp2[j + 3][0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class T> void gaussianBlur(T** src, T** dst, const int W, const int H, const double sigma, T *buffer = NULL)
|
||||
{
|
||||
|
||||
if(buffer) { // use iterated boxblur to approximate gaussian blur
|
||||
// Compute ideal averaging filter width and number of iterations
|
||||
int n = 1;
|
||||
double wIdeal = sqrt((12 * sigma * sigma) + 1);
|
||||
|
||||
while(wIdeal > W || wIdeal > H) {
|
||||
n++;
|
||||
wIdeal = sqrt((12 * sigma * sigma / n) + 1);
|
||||
}
|
||||
|
||||
if(n < 3) {
|
||||
n = 3;
|
||||
wIdeal = sqrt((12 * sigma * sigma / n) + 1);
|
||||
} else if(n > 6) {
|
||||
n = 6;
|
||||
}
|
||||
|
||||
int wl = wIdeal;
|
||||
|
||||
if(wl % 2 == 0) {
|
||||
wl--;
|
||||
}
|
||||
|
||||
int wu = wl + 2;
|
||||
|
||||
double mIdeal = (12 * sigma * sigma - n * wl * wl - 4 * n * wl - 3 * n) / (-4 * wl - 4);
|
||||
int m = round(mIdeal);
|
||||
|
||||
int sizes[n];
|
||||
|
||||
for(int i = 0; i < n; i++) {
|
||||
sizes[i] = ((i < m ? wl : wu) - 1) / 2;
|
||||
}
|
||||
|
||||
rtengine::boxblur(src, dst, buffer, sizes[0], sizes[0], W, H);
|
||||
|
||||
for(int i = 1; i < n; i++) {
|
||||
rtengine::boxblur(dst, dst, buffer, sizes[i], sizes[i], W, H);
|
||||
}
|
||||
|
||||
} else {
|
||||
gaussHorizontal<T> (src, dst, W, H, sigma);
|
||||
gaussVertical<T> (dst, dst, W, H, sigma);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
@ -115,21 +115,30 @@ static INLINE vfloat vcast_vf_f(float f)
|
||||
return _mm_set_ps(f, f, f, f);
|
||||
}
|
||||
|
||||
// Don't use intrinsics here. Newer gcc versions (>= 4.9, maybe also before 4.9) generate better code when not using intrinsics
|
||||
// example: vaddf(vmulf(a,b),c) will generate an FMA instruction when build for chips with that feature only when vaddf and vmulf don't use intrinsics
|
||||
static INLINE vfloat vaddf(vfloat x, vfloat y)
|
||||
{
|
||||
return _mm_add_ps(x, y);
|
||||
return x + y;
|
||||
}
|
||||
static INLINE vfloat vsubf(vfloat x, vfloat y)
|
||||
{
|
||||
return _mm_sub_ps(x, y);
|
||||
return x - y;
|
||||
}
|
||||
static INLINE vfloat vmulf(vfloat x, vfloat y)
|
||||
{
|
||||
return _mm_mul_ps(x, y);
|
||||
return x * y;
|
||||
}
|
||||
static INLINE vfloat vdivf(vfloat x, vfloat y)
|
||||
{
|
||||
return _mm_div_ps(x, y);
|
||||
return x / y;
|
||||
}
|
||||
// Also don't use intrinsic here: Some chips support FMA instructions with 3 and 4 operands
|
||||
// 3 operands: a = a*b+c, b = a*b+c, c = a*b+c // destination has to be one of a,b,c
|
||||
// 4 operands: d = a*b+c // destination does not have to be one of a,b,c
|
||||
// gcc will use the one which fits best when not using intrinsics. With using intrinsics that's not possible
|
||||
static INLINE vfloat vmlaf(vfloat x, vfloat y, vfloat z) {
|
||||
return x * y + z;
|
||||
}
|
||||
static INLINE vfloat vrecf(vfloat x)
|
||||
{
|
||||
|
@ -73,6 +73,7 @@ class ImProcFunctions
|
||||
void transformLuminanceOnly (Imagefloat* original, Imagefloat* transformed, int cx, int cy, int oW, int oH, int fW, int fH);
|
||||
void transformHighQuality (Imagefloat* original, Imagefloat* transformed, int cx, int cy, int sx, int sy, int oW, int oH, int fW, int fH, const LCPMapper *pLCPMap, bool fullImage);
|
||||
|
||||
void sharpenHaloCtrl (float** luminance, float** blurmap, float** base, int W, int H, const SharpeningParams &sharpenParam);
|
||||
void sharpenHaloCtrl (LabImage* lab, float** blurmap, float** base, int W, int H, SharpeningParams &sharpenParam);
|
||||
void sharpenHaloCtrlcam (CieImage* ncie, float** blurmap, float** base, int W, int H);
|
||||
void firstAnalysisThread(Imagefloat* original, Glib::ustring wprofile, unsigned int* histogram, int row_from, int row_to);
|
||||
@ -271,9 +272,9 @@ public:
|
||||
void Lanczos (const LabImage* src, LabImage* dst, float scale);
|
||||
void Lanczos (const Image16* src, Image16* dst, float scale);
|
||||
|
||||
void deconvsharpening (LabImage* lab, float** buffer, SharpeningParams &sharpenParam);
|
||||
void deconvsharpeningcam (CieImage* ncie, float** buffer);
|
||||
void deconvsharpening (float** luminance, float** buffer, int W, int H, const SharpeningParams &sharpenParam);
|
||||
void MLsharpen (LabImage* lab);// Manuel's clarity / sharpening
|
||||
void MLmicrocontrast(float** luminance, int W, int H ); //Manuel's microcontrast
|
||||
void MLmicrocontrast(LabImage* lab ); //Manuel's microcontrast
|
||||
void MLmicrocontrastcam(CieImage* ncie ); //Manuel's microcontrast
|
||||
|
||||
|
@ -32,7 +32,6 @@ namespace rtengine
|
||||
|
||||
SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
{
|
||||
|
||||
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
// impulse noise removal
|
||||
// local variables
|
||||
@ -41,15 +40,15 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
int height = lab->H;
|
||||
|
||||
// buffer for the lowpass image
|
||||
float ** lpf = new float *[height];
|
||||
float * lpf[height] ALIGNED16;
|
||||
lpf[0] = new float [width * height];
|
||||
// buffer for the highpass image
|
||||
float ** impish = new float *[height];
|
||||
char * impish[height] ALIGNED16;
|
||||
impish[0] = new char [width * height];
|
||||
|
||||
for (int i = 0; i < height; i++) {
|
||||
lpf[i] = new float [width];
|
||||
//memset (lpf[i], 0, width*sizeof(float));
|
||||
impish[i] = new float [width];
|
||||
//memset (impish[i], 0, width*sizeof(unsigned short));
|
||||
for (int i = 1; i < height; i++) {
|
||||
lpf[i] = lpf[i - 1] + width;
|
||||
impish[i] = impish[i - 1] + width;
|
||||
}
|
||||
|
||||
|
||||
@ -60,12 +59,11 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
|
||||
const float eps = 1.0;
|
||||
|
||||
//rangeblur<unsigned short, unsigned int> (lab->L, lpf, impish /*used as buffer here*/, width, height, thresh, false);
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
gaussianBlur<float> (lab->L, lpf, width, height, max(2.0, thresh - 1.0));
|
||||
gaussianBlur (lab->L, lpf, width, height, max(2.0, thresh - 1.0));
|
||||
}
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
@ -81,9 +79,9 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
int i1, j1, j;
|
||||
float hpfabs, hfnbrave;
|
||||
#ifdef __SSE2__
|
||||
__m128 hfnbravev, hpfabsv;
|
||||
__m128 impthrDiv24v = _mm_set1_ps( impthrDiv24 );
|
||||
__m128 onev = _mm_set1_ps( 1.0f );
|
||||
vfloat hfnbravev, hpfabsv;
|
||||
vfloat impthrDiv24v = F2V( impthrDiv24 );
|
||||
vfloat onev = F2V( 1.0f );
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
@ -105,46 +103,37 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
#ifdef __SSE2__
|
||||
|
||||
for (; j < width - 5; j += 4) {
|
||||
hfnbravev = _mm_setzero_ps( );
|
||||
hfnbravev = ZEROV;
|
||||
hpfabsv = vabsf(LVFU(lab->L[i][j]) - LVFU(lpf[i][j]));
|
||||
|
||||
//block average of high pass data
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) {
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++) {
|
||||
hfnbravev += vabsf(LVFU(lab->L[i1][j1]) - LVFU(lpf[i1][j1]));
|
||||
}
|
||||
}
|
||||
|
||||
_mm_storeu_ps(&impish[i][j], vself(vmaskf_gt(hpfabsv, (hfnbravev - hpfabsv)*impthrDiv24v), onev, _mm_setzero_ps()));
|
||||
}
|
||||
|
||||
for (; j < width - 2; j++) {
|
||||
hpfabs = fabs(lab->L[i][j] - lpf[i][j]);
|
||||
|
||||
//block average of high pass data
|
||||
for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++) {
|
||||
hfnbrave += fabs(lab->L[i1][j1] - lpf[i1][j1]);
|
||||
}
|
||||
|
||||
impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (; j < width - 2; j++) {
|
||||
hpfabs = fabs(lab->L[i][j] - lpf[i][j]);
|
||||
|
||||
//block average of high pass data
|
||||
for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++) {
|
||||
hfnbrave += fabs(lab->L[i1][j1] - lpf[i1][j1]);
|
||||
}
|
||||
|
||||
impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
|
||||
int mask = _mm_movemask_ps((hfnbravev - hpfabsv) * impthrDiv24v - hpfabsv);
|
||||
impish[i][j] = (mask & 1);
|
||||
impish[i][j + 1] = ((mask & 2) >> 1);
|
||||
impish[i][j + 2] = ((mask & 4) >> 2);
|
||||
impish[i][j + 3] = ((mask & 8) >> 3);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
for (; j < width - 2; j++) {
|
||||
hpfabs = fabs(lab->L[i][j] - lpf[i][j]);
|
||||
|
||||
//block average of high pass data
|
||||
for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++) {
|
||||
hfnbrave += fabs(lab->L[i1][j1] - lpf[i1][j1]);
|
||||
}
|
||||
|
||||
impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
|
||||
}
|
||||
|
||||
for (; j < width; j++) {
|
||||
hpfabs = fabs(lab->L[i][j] - lpf[i][j]);
|
||||
|
||||
@ -188,10 +177,6 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = 0; j1 <= j + 2; j1++ ) {
|
||||
if (i1 == i && j1 == j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (impish[i1][j1]) {
|
||||
continue;
|
||||
}
|
||||
@ -220,10 +205,6 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++ ) {
|
||||
if (i1 == i && j1 == j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (impish[i1][j1]) {
|
||||
continue;
|
||||
}
|
||||
@ -252,10 +233,6 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 < width; j1++ ) {
|
||||
if (i1 == i && j1 == j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (impish[i1][j1]) {
|
||||
continue;
|
||||
}
|
||||
@ -277,13 +254,8 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||
}
|
||||
//now impulsive values have been corrected
|
||||
|
||||
for (int i = 0; i < height; i++) {
|
||||
delete [] lpf[i];
|
||||
delete [] impish[i];
|
||||
}
|
||||
|
||||
delete [] lpf;
|
||||
delete [] impish;
|
||||
delete [] lpf[0];
|
||||
delete [] impish[0];
|
||||
|
||||
}
|
||||
|
||||
@ -317,7 +289,7 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
gaussianBlur<float> (ncie->sh_p, lpf, width, height, max(2.0, thresh - 1.0));
|
||||
gaussianBlur (ncie->sh_p, lpf, width, height, max(2.0, thresh - 1.0));
|
||||
}
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
@ -332,9 +304,9 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
int i1, j1, j;
|
||||
float hpfabs, hfnbrave;
|
||||
#ifdef __SSE2__
|
||||
__m128 hfnbravev, hpfabsv;
|
||||
__m128 impthrDiv24v = _mm_set1_ps( impthrDiv24 );
|
||||
__m128 onev = _mm_set1_ps( 1.0f );
|
||||
vfloat hfnbravev, hpfabsv;
|
||||
vfloat impthrDiv24v = F2V( impthrDiv24 );
|
||||
vfloat onev = F2V( 1.0f );
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
@ -357,7 +329,7 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
|
||||
for (; j < width - 5; j += 4) {
|
||||
hpfabsv = vabsf(LVFU(ncie->sh_p[i][j]) - LVFU(lpf[i][j]));
|
||||
hfnbravev = _mm_setzero_ps();
|
||||
hfnbravev = ZEROV;
|
||||
|
||||
//block average of high pass data
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ ) {
|
||||
@ -365,38 +337,25 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
hfnbravev += vabsf(LVFU(ncie->sh_p[i1][j1]) - LVFU(lpf[i1][j1]));
|
||||
}
|
||||
|
||||
_mm_storeu_ps(&impish[i][j], vself(vmaskf_gt(hpfabsv, (hfnbravev - hpfabsv)*impthrDiv24v), onev, _mm_setzero_ps()));
|
||||
}
|
||||
}
|
||||
|
||||
for (; j < width - 2; j++) {
|
||||
hpfabs = fabs(ncie->sh_p[i][j] - lpf[i][j]);
|
||||
|
||||
//block average of high pass data
|
||||
for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++ ) {
|
||||
hfnbrave += fabs(ncie->sh_p[i1][j1] - lpf[i1][j1]);
|
||||
}
|
||||
|
||||
impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (; j < width - 2; j++) {
|
||||
hpfabs = fabs(ncie->sh_p[i][j] - lpf[i][j]);
|
||||
|
||||
//block average of high pass data
|
||||
for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++ ) {
|
||||
hfnbrave += fabs(ncie->sh_p[i1][j1] - lpf[i1][j1]);
|
||||
}
|
||||
|
||||
impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
|
||||
STVFU(impish[i][j], vselfzero(vmaskf_gt(hpfabsv, (hfnbravev - hpfabsv)*impthrDiv24v), onev));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
for (; j < width - 2; j++) {
|
||||
hpfabs = fabs(ncie->sh_p[i][j] - lpf[i][j]);
|
||||
|
||||
//block average of high pass data
|
||||
for (i1 = max(0, i - 2), hfnbrave = 0; i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++ ) {
|
||||
hfnbrave += fabs(ncie->sh_p[i1][j1] - lpf[i1][j1]);
|
||||
}
|
||||
|
||||
impish[i][j] = (hpfabs > ((hfnbrave - hpfabs) * impthrDiv24));
|
||||
}
|
||||
|
||||
for (; j < width; j++) {
|
||||
hpfabs = fabs(ncie->sh_p[i][j] - lpf[i][j]);
|
||||
|
||||
@ -422,42 +381,34 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
int j;
|
||||
float2 sincosval;
|
||||
|
||||
#ifdef __SSE2__
|
||||
vfloat2 sincosvalv;
|
||||
__m128 piidv = _mm_set1_ps( piid );
|
||||
__m128 tempv;
|
||||
vfloat piidv = F2V( piid );
|
||||
vfloat tempv;
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < height; i++) {
|
||||
int j = 0;
|
||||
#ifdef __SSE2__
|
||||
|
||||
for (j = 0; j < width - 3; j += 4) {
|
||||
for (; j < width - 3; j += 4) {
|
||||
sincosvalv = xsincosf(piidv * LVFU(ncie->h_p[i][j]));
|
||||
tempv = LVFU(ncie->C_p[i][j]);
|
||||
_mm_storeu_ps(&sraa[i][j], tempv * sincosvalv.y);
|
||||
_mm_storeu_ps(&srbb[i][j], tempv * sincosvalv.x);
|
||||
}
|
||||
|
||||
for (; j < width; j++) {
|
||||
sincosval = xsincosf(piid * ncie->h_p[i][j]);
|
||||
sraa[i][j] = ncie->C_p[i][j] * sincosval.y;
|
||||
srbb[i][j] = ncie->C_p[i][j] * sincosval.x;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (j = 0; j < width; j++) {
|
||||
sincosval = xsincosf(piid * ncie->h_p[i][j]);
|
||||
sraa[i][j] = ncie->C_p[i][j] * sincosval.y;
|
||||
srbb[i][j] = ncie->C_p[i][j] * sincosval.x;
|
||||
STVFU(sraa[i][j], tempv * sincosvalv.y);
|
||||
STVFU(srbb[i][j], tempv * sincosvalv.x);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
for (; j < width; j++) {
|
||||
float2 sincosval = xsincosf(piid * ncie->h_p[i][j]);
|
||||
sraa[i][j] = ncie->C_p[i][j] * sincosval.y;
|
||||
srbb[i][j] = ncie->C_p[i][j] * sincosval.x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -488,10 +439,6 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = 0; j1 <= j + 2; j1++ ) {
|
||||
if (i1 == i && j1 == j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (impish[i1][j1]) {
|
||||
continue;
|
||||
}
|
||||
@ -520,10 +467,6 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 <= j + 2; j1++ ) {
|
||||
if (i1 == i && j1 == j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (impish[i1][j1]) {
|
||||
continue;
|
||||
}
|
||||
@ -552,10 +495,6 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
|
||||
for (i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
|
||||
for (j1 = j - 2; j1 < width; j1++ ) {
|
||||
if (i1 == i && j1 == j) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (impish[i1][j1]) {
|
||||
continue;
|
||||
}
|
||||
@ -583,41 +522,32 @@ SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh,
|
||||
#endif
|
||||
{
|
||||
#ifdef __SSE2__
|
||||
__m128 interav, interbv;
|
||||
__m128 piidv = _mm_set1_ps(piid);
|
||||
vfloat interav, interbv;
|
||||
vfloat piidv = F2V(piid);
|
||||
#endif // __SSE2__
|
||||
int j;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for(int i = 0; i < height; i++ ) {
|
||||
int j = 0;
|
||||
#ifdef __SSE2__
|
||||
|
||||
for(j = 0; j < width - 3; j += 4) {
|
||||
for(; j < width - 3; j += 4) {
|
||||
interav = LVFU(sraa[i][j]);
|
||||
interbv = LVFU(srbb[i][j]);
|
||||
_mm_storeu_ps(&ncie->h_p[i][j], (xatan2f(interbv, interav)) / piidv);
|
||||
_mm_storeu_ps(&ncie->C_p[i][j], _mm_sqrt_ps(SQRV(interbv) + SQRV(interav)));
|
||||
STVFU(ncie->h_p[i][j], (xatan2f(interbv, interav)) / piidv);
|
||||
STVFU(ncie->C_p[i][j], vsqrtf(SQRV(interbv) + SQRV(interav)));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
for(; j < width; j++) {
|
||||
float intera = sraa[i][j];
|
||||
float interb = srbb[i][j];
|
||||
ncie->h_p[i][j] = (xatan2f(interb, intera)) / piid;
|
||||
ncie->C_p[i][j] = sqrt(SQR(interb) + SQR(intera));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for(j = 0; j < width; j++) {
|
||||
float intera = sraa[i][j];
|
||||
float interb = srbb[i][j];
|
||||
ncie->h_p[i][j] = (xatan2f(interb, intera)) / piid;
|
||||
ncie->C_p[i][j] = sqrt(SQR(interb) + SQR(intera));
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
* adaptation to RawTherapee
|
||||
* 2015 Jacques Desmis <jdesmis@gmail.com>
|
||||
* 2015 Ingo Weyrich <heckflosse@i-weyrich.de>
|
||||
* 2015 Ingo Weyrich <heckflosse67@gmx.de>
|
||||
|
||||
* D. J. Jobson, Z. Rahman, and G. A. Woodell. A multi-scale
|
||||
* Retinex for bridging the gap between color images and the
|
||||
@ -235,17 +235,19 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
bool higplus = false ;
|
||||
float elogt;
|
||||
float hl = deh.baselog;
|
||||
|
||||
if(hl >= 2.71828f) {
|
||||
elogt = 2.71828f + SQR(SQR(hl - 2.71828f));
|
||||
} else {
|
||||
elogt = hl;
|
||||
}
|
||||
|
||||
int H_L = height;
|
||||
int W_L = width;
|
||||
|
||||
float *tran[H_L] ALIGNED16;
|
||||
float *tranBuffer;
|
||||
int viewmet=0;
|
||||
int viewmet = 0;
|
||||
|
||||
elogt = 2.71828f;//disabled baselog
|
||||
FlatCurve* shcurve = NULL;//curve L=f(H)
|
||||
@ -280,116 +282,121 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
if (deh.retinexMethod == "highli" || deh.retinexMethod == "highliplus") {
|
||||
moderetinex = 3;
|
||||
}
|
||||
for(int it=1; it<iter+1; it++) {//iter nb max of iterations
|
||||
|
||||
for(int it = 1; it < iter + 1; it++) { //iter nb max of iterations
|
||||
float aahi = 49.f / 99.f; ////reduce sensibility 50%
|
||||
float bbhi = 1.f - aahi;
|
||||
float high;
|
||||
high = bbhi + aahi * (float) deh.highl;
|
||||
|
||||
float grads;
|
||||
float grad=1.f;
|
||||
float grad = 1.f;
|
||||
float sc = 3.f;
|
||||
if(gradient==0) {
|
||||
grad=1.f;
|
||||
sc=3.f;
|
||||
}
|
||||
else if(gradient==1) {
|
||||
grad=0.25f*it+0.75f;
|
||||
sc=-0.5f*it+4.5f;
|
||||
}
|
||||
else if(gradient==2) {
|
||||
grad=0.5f*it+0.5f;
|
||||
sc=-0.75f*it+5.75f;
|
||||
}
|
||||
else if(gradient==3) {
|
||||
grad=0.666f*it+0.333f;
|
||||
sc=-0.75f*it+5.75f;
|
||||
}
|
||||
else if(gradient==4) {
|
||||
grad=0.8f*it+0.2f;
|
||||
sc=-0.75f*it+5.75f;
|
||||
}
|
||||
else if(gradient==5) {
|
||||
if(moderetinex!=3) {
|
||||
grad=2.5f*it-1.5f;
|
||||
|
||||
if(gradient == 0) {
|
||||
grad = 1.f;
|
||||
sc = 3.f;
|
||||
} else if(gradient == 1) {
|
||||
grad = 0.25f * it + 0.75f;
|
||||
sc = -0.5f * it + 4.5f;
|
||||
} else if(gradient == 2) {
|
||||
grad = 0.5f * it + 0.5f;
|
||||
sc = -0.75f * it + 5.75f;
|
||||
} else if(gradient == 3) {
|
||||
grad = 0.666f * it + 0.333f;
|
||||
sc = -0.75f * it + 5.75f;
|
||||
} else if(gradient == 4) {
|
||||
grad = 0.8f * it + 0.2f;
|
||||
sc = -0.75f * it + 5.75f;
|
||||
} else if(gradient == 5) {
|
||||
if(moderetinex != 3) {
|
||||
grad = 2.5f * it - 1.5f;
|
||||
} else {
|
||||
float aa = (11.f * high - 1.f) / 4.f;
|
||||
float bb = 1.f - aa;
|
||||
grad = aa * it + bb;
|
||||
}
|
||||
else {
|
||||
float aa=(11.f*high-1.f)/4.f;
|
||||
float bb=1.f-aa;
|
||||
grad=aa*it+bb;
|
||||
|
||||
sc = -0.75f * it + 5.75f;
|
||||
} else if(gradient == 6) {
|
||||
if(moderetinex != 3) {
|
||||
grad = 5.f * it - 4.f;
|
||||
} else {
|
||||
float aa = (21.f * high - 1.f) / 4.f;
|
||||
float bb = 1.f - aa;
|
||||
grad = aa * it + bb;
|
||||
}
|
||||
sc=-0.75f*it+5.75f;
|
||||
}
|
||||
else if(gradient==6) {
|
||||
if(moderetinex!=3) {
|
||||
grad=5.f*it-4.f;
|
||||
}
|
||||
else {
|
||||
float aa=(21.f*high-1.f)/4.f;
|
||||
float bb=1.f-aa;
|
||||
grad=aa*it+bb;
|
||||
}
|
||||
sc=-0.75f*it+5.75f;
|
||||
|
||||
sc = -0.75f * it + 5.75f;
|
||||
}
|
||||
|
||||
else if(gradient==-1) {
|
||||
grad=-0.125f*it+1.125f;
|
||||
sc=3.f;
|
||||
else if(gradient == -1) {
|
||||
grad = -0.125f * it + 1.125f;
|
||||
sc = 3.f;
|
||||
}
|
||||
|
||||
float varx;
|
||||
float limdx, ilimdx;
|
||||
if(gradvart!=0) {
|
||||
if(gradvart==1) {
|
||||
varx=vart*(-0.125f*it+1.125f);
|
||||
limdx=limD*(-0.125f*it+1.125f);
|
||||
ilimdx=1.f/limdx;
|
||||
}
|
||||
else if(gradvart==2) {
|
||||
varx=vart*(-0.2f*it+1.2f);
|
||||
limdx=limD*(-0.2f*it+1.2f);
|
||||
ilimdx=1.f/limdx;
|
||||
}
|
||||
else if(gradvart==-1) {
|
||||
varx=vart*(0.125f*it+0.875f);
|
||||
limdx=limD*(0.125f*it+0.875f);
|
||||
ilimdx=1.f/limdx;
|
||||
}
|
||||
else if(gradvart==-2) {
|
||||
varx=vart*(0.4f*it+0.6f);
|
||||
limdx=limD*(0.4f*it+0.6f);
|
||||
ilimdx=1.f/limdx;
|
||||
|
||||
if(gradvart != 0) {
|
||||
if(gradvart == 1) {
|
||||
varx = vart * (-0.125f * it + 1.125f);
|
||||
limdx = limD * (-0.125f * it + 1.125f);
|
||||
ilimdx = 1.f / limdx;
|
||||
} else if(gradvart == 2) {
|
||||
varx = vart * (-0.2f * it + 1.2f);
|
||||
limdx = limD * (-0.2f * it + 1.2f);
|
||||
ilimdx = 1.f / limdx;
|
||||
} else if(gradvart == -1) {
|
||||
varx = vart * (0.125f * it + 0.875f);
|
||||
limdx = limD * (0.125f * it + 0.875f);
|
||||
ilimdx = 1.f / limdx;
|
||||
} else if(gradvart == -2) {
|
||||
varx = vart * (0.4f * it + 0.6f);
|
||||
limdx = limD * (0.4f * it + 0.6f);
|
||||
ilimdx = 1.f / limdx;
|
||||
}
|
||||
} else {
|
||||
varx = vart;
|
||||
limdx = limD;
|
||||
ilimdx = ilimD;
|
||||
}
|
||||
else {
|
||||
varx=vart;
|
||||
limdx=limD;
|
||||
ilimdx=ilimD;
|
||||
}
|
||||
scal=round(sc);
|
||||
|
||||
scal = round(sc);
|
||||
float strengthx;
|
||||
float ks=1.f;
|
||||
float ks = 1.f;
|
||||
|
||||
if(gradstr!=0) {
|
||||
if(gradstr==1) {
|
||||
if(it <= 3) ks=-0.3f*it+1.6f;
|
||||
else ks = 0.5f;
|
||||
}
|
||||
else if(gradstr==2) {
|
||||
if(it <= 3) ks=-0.6f*it+2.2f;
|
||||
else ks = 0.3f;
|
||||
}
|
||||
else if(gradstr==-1) {
|
||||
if(it <= 3) ks=0.2f*it+0.6f;
|
||||
else ks = 1.2f;
|
||||
}
|
||||
else if(gradstr==-2) {
|
||||
if(it <= 3) ks=0.4f*it+0.2f;
|
||||
else ks = 1.5f;
|
||||
if(gradstr != 0) {
|
||||
if(gradstr == 1) {
|
||||
if(it <= 3) {
|
||||
ks = -0.3f * it + 1.6f;
|
||||
} else {
|
||||
ks = 0.5f;
|
||||
}
|
||||
} else if(gradstr == 2) {
|
||||
if(it <= 3) {
|
||||
ks = -0.6f * it + 2.2f;
|
||||
} else {
|
||||
ks = 0.3f;
|
||||
}
|
||||
} else if(gradstr == -1) {
|
||||
if(it <= 3) {
|
||||
ks = 0.2f * it + 0.6f;
|
||||
} else {
|
||||
ks = 1.2f;
|
||||
}
|
||||
} else if(gradstr == -2) {
|
||||
if(it <= 3) {
|
||||
ks = 0.4f * it + 0.2f;
|
||||
} else {
|
||||
ks = 1.5f;
|
||||
}
|
||||
}
|
||||
}
|
||||
strengthx=ks*strength;
|
||||
|
||||
retinex_scales( RetinexScales, scal, moderetinex, nei/grad, high );
|
||||
strengthx = ks * strength;
|
||||
|
||||
retinex_scales( RetinexScales, scal, moderetinex, nei / grad, high );
|
||||
|
||||
float *src[H_L] ALIGNED16;
|
||||
float *srcBuffer = new float[H_L * W_L];
|
||||
@ -402,17 +409,41 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
|
||||
int shHighlights = deh.highlights;
|
||||
int shShadows = deh.shadows;
|
||||
int mapmet=0;
|
||||
if(deh.mapMethod=="map") mapmet=2;
|
||||
if(deh.mapMethod=="mapT") mapmet=3;
|
||||
if(deh.mapMethod=="curv") mapmet=1;
|
||||
if(deh.mapMethod=="gaus") mapmet=4;
|
||||
int mapmet = 0;
|
||||
|
||||
if(deh.mapMethod == "map") {
|
||||
mapmet = 2;
|
||||
}
|
||||
|
||||
if(deh.mapMethod == "mapT") {
|
||||
mapmet = 3;
|
||||
}
|
||||
|
||||
if(deh.mapMethod == "curv") {
|
||||
mapmet = 1;
|
||||
}
|
||||
|
||||
if(deh.mapMethod == "gaus") {
|
||||
mapmet = 4;
|
||||
}
|
||||
|
||||
double shradius = (double) deh.radius;
|
||||
|
||||
if(deh.viewMethod=="mask") viewmet=1;
|
||||
if(deh.viewMethod=="tran") viewmet=2;
|
||||
if(deh.viewMethod=="tran2") viewmet=3;
|
||||
if(deh.viewMethod=="unsharp") viewmet=4;
|
||||
if(deh.viewMethod == "mask") {
|
||||
viewmet = 1;
|
||||
}
|
||||
|
||||
if(deh.viewMethod == "tran") {
|
||||
viewmet = 2;
|
||||
}
|
||||
|
||||
if(deh.viewMethod == "tran2") {
|
||||
viewmet = 3;
|
||||
}
|
||||
|
||||
if(deh.viewMethod == "unsharp") {
|
||||
viewmet = 4;
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
@ -431,13 +462,14 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
out[i] = &outBuffer[i * W_L];
|
||||
}
|
||||
|
||||
if(viewmet==3 || viewmet==2) {
|
||||
if(viewmet == 3 || viewmet == 2) {
|
||||
tranBuffer = new float[H_L * W_L];
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
tran[i] = &tranBuffer[i * W_L];
|
||||
}
|
||||
}
|
||||
|
||||
const float logBetaGain = xlogf(16384.f);
|
||||
float pond = logBetaGain / (float) scal;
|
||||
|
||||
@ -456,29 +488,36 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
{
|
||||
for ( int scale = scal - 1; scale >= 0; scale-- ) {
|
||||
if(scale == scal - 1) {
|
||||
gaussianBlur<float> (src, out, W_L, H_L, RetinexScales[scale], buffer);
|
||||
} else {
|
||||
// reuse result of last iteration
|
||||
gaussianBlur<float> (out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), buffer);
|
||||
gaussianBlur (src, out, W_L, H_L, RetinexScales[scale], buffer);
|
||||
} else { // reuse result of last iteration
|
||||
gaussianBlur (out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), buffer);
|
||||
}
|
||||
//printf("scal=%d RetinexScales=%f\n",scale, RetinexScales[scale]);
|
||||
printf("..");
|
||||
|
||||
|
||||
if(mapmet==4) shradius /= 1.;
|
||||
else shradius = 40.;
|
||||
if(mapmet == 4) {
|
||||
shradius /= 1.;
|
||||
} else {
|
||||
shradius = 40.;
|
||||
}
|
||||
|
||||
// if(shHighlights > 0 || shShadows > 0) {
|
||||
if(mapmet==3) if(it==1) shmap->updateL (out, shradius, true, 1);//wav Total
|
||||
if(mapmet==2 && scale >2) if(it==1) shmap->updateL (out, shradius, true, 1);//wav partial
|
||||
if(mapmet==4) if(it==1) shmap->updateL (out, shradius, false, 1);//gauss
|
||||
if(mapmet == 3) if(it == 1) {
|
||||
shmap->updateL (out, shradius, true, 1); //wav Total
|
||||
}
|
||||
|
||||
if(mapmet == 2 && scale > 2) if(it == 1) {
|
||||
shmap->updateL (out, shradius, true, 1); //wav partial
|
||||
}
|
||||
|
||||
if(mapmet == 4) if(it == 1) {
|
||||
shmap->updateL (out, shradius, false, 1); //gauss
|
||||
}
|
||||
|
||||
// }
|
||||
if (shmap) {
|
||||
h_th = shmap->max_f - deh.htonalwidth * (shmap->max_f - shmap->avg) / 100;
|
||||
s_th = deh.stonalwidth * (shmap->avg - shmap->min_f) / 100;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __SSE2__
|
||||
vfloat pondv = F2V(pond);
|
||||
vfloat limMinv = F2V(ilimdx);
|
||||
@ -490,11 +529,15 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
if(mapcontlutili) {
|
||||
int j = 0;
|
||||
|
||||
for (; j < W_L; j++) {
|
||||
if(it==1) out[i][j] = mapcurve[2.f * out[i][j]] / 2.f;
|
||||
if(it == 1) {
|
||||
out[i][j] = mapcurve[2.f * out[i][j]] / 2.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -502,14 +545,16 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
|
||||
// if(shHighlights > 0 || shShadows > 0) {
|
||||
if(((mapmet == 2 && scale >2) || mapmet==3 || mapmet==4) && it==1) {
|
||||
if(((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) {
|
||||
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
int j = 0;
|
||||
|
||||
for (; j < W_L; j++) {
|
||||
double mapval = 1.0 + shmap->map[i][j];
|
||||
double factor = 1.0;
|
||||
@ -519,11 +564,13 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
} else if (mapval < s_th) {
|
||||
factor = (s_th - (100.0 - shShadows) * (s_th - mapval) / 100.0) / mapval;
|
||||
}
|
||||
|
||||
out[i][j] *= factor;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// }
|
||||
|
||||
#ifdef _OPENMP
|
||||
@ -559,12 +606,13 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
}
|
||||
}
|
||||
printf(".\n");
|
||||
|
||||
if(mapmet > 1) {
|
||||
if(shmap) {
|
||||
delete shmap;
|
||||
}
|
||||
}
|
||||
|
||||
shmap = NULL;
|
||||
|
||||
delete [] buffer;
|
||||
@ -613,12 +661,15 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
|
||||
luminance[i][j] *= (-1.f + 4.f * dehatransmissionCurve[absciss]); //new transmission
|
||||
if(viewmet==3 || viewmet==2) tran[i][j]=luminance[i][j];
|
||||
|
||||
if(viewmet == 3 || viewmet == 2) {
|
||||
tran[i][j] = luminance[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// median filter on transmission ==> reduce artifacts
|
||||
if (deh.medianmap && it==1) {//only one time
|
||||
if (deh.medianmap && it == 1) { //only one time
|
||||
int wid = W_L;
|
||||
int hei = H_L;
|
||||
float *tmL[hei] ALIGNED16;
|
||||
@ -686,10 +737,10 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
maxCD = -9999999.f;
|
||||
minCD = 9999999.f;
|
||||
// coeff for auto "transmission" with 2 sigma #95% datas
|
||||
float aza=16300.f/(2.f*stddv);
|
||||
float azb=-aza*(mean-2.f*stddv);
|
||||
float bza=16300.f/(2.f*stddv);
|
||||
float bzb=16300.f-bza*(mean);
|
||||
float aza = 16300.f / (2.f * stddv);
|
||||
float azb = -aza * (mean - 2.f * stddv);
|
||||
float bza = 16300.f / (2.f * stddv);
|
||||
float bzb = 16300.f - bza * (mean);
|
||||
|
||||
|
||||
|
||||
@ -718,7 +769,7 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
|
||||
float str = strengthx;
|
||||
|
||||
if(lhutili && it==1) { // S=f(H)
|
||||
if(lhutili && it == 1) { // S=f(H)
|
||||
{
|
||||
float HH = exLuminance[i][j];
|
||||
float valparam;
|
||||
@ -733,15 +784,33 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
}
|
||||
|
||||
if(exLuminance[i][j] > 65535.f*hig && higplus) str *= hig;
|
||||
if(viewmet==0) luminance[i][j]=clipretinex( cd, 0.f, 32768.f ) * str + (1.f - str) * originalLuminance[i][j];
|
||||
if(viewmet==1) luminance[i][j] = out[i][j];
|
||||
if(viewmet==4) luminance[i][j] = (1.f + str) * originalLuminance[i][j] - str* out[i][j];//unsharp
|
||||
if(viewmet==2) {
|
||||
if(tran[i][j]<= mean) luminance[i][j] = azb + aza*tran[i][j];//auto values
|
||||
else luminance[i][j] = bzb + bza*tran[i][j];
|
||||
if(exLuminance[i][j] > 65535.f * hig && higplus) {
|
||||
str *= hig;
|
||||
}
|
||||
|
||||
if(viewmet == 0) {
|
||||
luminance[i][j] = clipretinex( cd, 0.f, 32768.f ) * str + (1.f - str) * originalLuminance[i][j];
|
||||
}
|
||||
|
||||
if(viewmet == 1) {
|
||||
luminance[i][j] = out[i][j];
|
||||
}
|
||||
|
||||
if(viewmet == 4) {
|
||||
luminance[i][j] = (1.f + str) * originalLuminance[i][j] - str * out[i][j]; //unsharp
|
||||
}
|
||||
|
||||
if(viewmet == 2) {
|
||||
if(tran[i][j] <= mean) {
|
||||
luminance[i][j] = azb + aza * tran[i][j]; //auto values
|
||||
} else {
|
||||
luminance[i][j] = bzb + bza * tran[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
if(viewmet == 3) {
|
||||
luminance[i][j] = 1000.f + tran[i][j] * 700.f; //arbitrary values to help display log values which are between -20 to + 30 - usage values -4 + 5
|
||||
}
|
||||
if(viewmet==3) luminance[i][j] = 1000.f + tran[i][j]*700.f;//arbitrary values to help display log values which are between -20 to + 30 - usage values -4 + 5
|
||||
|
||||
}
|
||||
|
||||
@ -763,11 +832,12 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
Tmax = maxtr;
|
||||
|
||||
|
||||
if (shcurve && it==1) {
|
||||
if (shcurve && it == 1) {
|
||||
delete shcurve;
|
||||
}
|
||||
}
|
||||
if(viewmet==3 || viewmet==2) {
|
||||
|
||||
if(viewmet == 3 || viewmet == 2) {
|
||||
delete [] tranBuffer;
|
||||
tranBuffer = NULL;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -88,7 +88,7 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
gaussianBlur<float> (map, map, W, H, radius);
|
||||
gaussianBlur (map, map, W, H, radius);
|
||||
}
|
||||
}
|
||||
|
||||
@ -233,7 +233,7 @@ void SHMap::updateL (float** L, double radius, bool hq, int skip)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
gaussianBlur<float> (map, map, W, H, radius);
|
||||
gaussianBlur (map, map, W, H, radius);
|
||||
}
|
||||
}
|
||||
|
||||
@ -244,7 +244,7 @@ void SHMap::updateL (float** L, double radius, bool hq, int skip)
|
||||
//experimental dirpyr shmap
|
||||
float thresh = (100.f * radius); //1000;
|
||||
int levrad = 16;
|
||||
levrad=2;//for retinex - otherwise levrad = 16
|
||||
levrad = 2; //for retinex - otherwise levrad = 16
|
||||
// set up range function
|
||||
// calculate size of Lookup table. That's possible because from a value k for all i>=k rangefn[i] will be exp(-10)
|
||||
// So we use this fact and the automatic clip of lut to reduce the size of lut and the number of calculations to fill the lut
|
||||
@ -253,6 +253,7 @@ void SHMap::updateL (float** L, double radius, bool hq, int skip)
|
||||
const int lutSize = (int) thresh * sqrtf(10.f) + 1;
|
||||
thresh *= thresh;
|
||||
LUTf rangefn(lutSize);
|
||||
|
||||
for (int i = 0; i < lutSize - 1; i++) {
|
||||
rangefn[i] = xexpf(-min(10.f, (static_cast<float>(i) * i) / thresh )); //*intfactor;
|
||||
}
|
||||
@ -275,6 +276,7 @@ void SHMap::updateL (float** L, double radius, bool hq, int skip)
|
||||
scale *= 2;
|
||||
numLevels++;
|
||||
}
|
||||
|
||||
//printf("numlev=%d\n",numLevels);
|
||||
float ** dirpyrlo[2];
|
||||
|
||||
|
@ -901,11 +901,7 @@ static INLINE vdouble xlog1p(vdouble a) {
|
||||
typedef struct {
|
||||
vfloat x, y;
|
||||
} vfloat2;
|
||||
#if defined( __FMA__ ) && defined( __x86_64__ )
|
||||
static INLINE vfloat vmlaf(vfloat x, vfloat y, vfloat z) { return _mm_fmadd_ps(x,y,z); }
|
||||
#else
|
||||
static INLINE vfloat vmlaf(vfloat x, vfloat y, vfloat z) { return vaddf(vmulf(x, y), z); }
|
||||
#endif
|
||||
|
||||
static INLINE vfloat vabsf(vfloat f) { return (vfloat)vandnotm((vmask)vcast_vf_f(-0.0f), (vmask)f); }
|
||||
static INLINE vfloat vnegf(vfloat f) { return (vfloat)vxorm((vmask)f, (vmask)vcast_vf_f(-0.0f)); }
|
||||
|
||||
@ -921,6 +917,18 @@ static INLINE vfloat vnegf(vfloat f) { return (vfloat)vxorm((vmask)f, (vmask)vca
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE vfloat vselfzero(vmask mask, vfloat x) {
|
||||
// returns value of x if corresponding mask bits are 1, else returns 0
|
||||
// faster than vself(mask, x, ZEROV)
|
||||
return _mm_and_ps((vfloat)mask, x);
|
||||
}
|
||||
static INLINE vfloat vselfnotzero(vmask mask, vfloat x) {
|
||||
// returns value of x if corresponding mask bits are 0, else returns 0
|
||||
// faster than vself(mask, ZEROV, x)
|
||||
return _mm_andnot_ps((vfloat)mask, x);
|
||||
}
|
||||
|
||||
|
||||
static INLINE vint2 vseli2_lt(vfloat f0, vfloat f1, vint2 x, vint2 y) {
|
||||
vint2 m2 = vcast_vi2_vm(vmaskf_lt(f0, f1));
|
||||
return vori2(vandi2(m2, x), vandnoti2(m2, y));
|
||||
@ -1171,7 +1179,7 @@ static INLINE vfloat xatan2f(vfloat y, vfloat x) {
|
||||
r = vmulsignf(r, x);
|
||||
r = vself(vorm(vmaskf_isinf(x), vmaskf_eq(x, vcast_vf_f(0.0f))), vsubf(vcast_vf_f((float)(M_PI/2)), visinf2f(x, vmulsignf(vcast_vf_f((float)(M_PI/2)), x))), r);
|
||||
r = vself(vmaskf_isinf(y), vsubf(vcast_vf_f((float)(M_PI/2)), visinf2f(x, vmulsignf(vcast_vf_f((float)(M_PI/4)), x))), r);
|
||||
r = vself(vmaskf_eq(y, vcast_vf_f(0.0f)), vself(vmaskf_eq(vsignf(x), vcast_vf_f(-1.0f)), vcast_vf_f((float)M_PI), vcast_vf_f(0.0f)), r);
|
||||
r = vself(vmaskf_eq(y, vcast_vf_f(0.0f)), vselfzero(vmaskf_eq(vsignf(x), vcast_vf_f(-1.0f)), vcast_vf_f((float)M_PI)), r);
|
||||
|
||||
return vself(vorm(vmaskf_isnan(x), vmaskf_isnan(y)), vcast_vf_f(NANf), vmulsignf(r, y));
|
||||
}
|
||||
@ -1304,7 +1312,7 @@ static INLINE vfloat xcbrtf(vfloat d) {
|
||||
}
|
||||
|
||||
static INLINE vfloat LIMV( vfloat a, vfloat b, vfloat c ) {
|
||||
return _mm_max_ps( b, _mm_min_ps(a,c));
|
||||
return vmaxf( b, vminf(a,c));
|
||||
}
|
||||
|
||||
static INLINE vfloat ULIMV( vfloat a, vfloat b, vfloat c ){
|
||||
@ -1312,13 +1320,13 @@ static INLINE vfloat ULIMV( vfloat a, vfloat b, vfloat c ){
|
||||
}
|
||||
|
||||
static INLINE vfloat SQRV(vfloat a){
|
||||
return _mm_mul_ps( a,a );
|
||||
return a * a;
|
||||
}
|
||||
|
||||
static inline void vswap( vmask condition, vfloat &a, vfloat &b) {
|
||||
vfloat temp = vself(condition, a, b); // the larger of the two
|
||||
condition = vnotm(condition); // invert the mask
|
||||
a = vself(condition, a, b); // the smaller of the two
|
||||
vfloat temp = vself(condition, a, b); // the values which fit to condition
|
||||
condition = vnotm(condition); // invert the condition
|
||||
a = vself(condition, a, b); // the values which fit to inverted condition
|
||||
b = temp;
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ Sharpening::Sharpening () : FoldableToolPanel(this, "sharpening", M("TP_SHARPENI
|
||||
pack_start (*hb);
|
||||
|
||||
rld = new Gtk::VBox ();
|
||||
dradius = Gtk::manage (new Adjuster (M("TP_SHARPENING_EDRADIUS"), 0.5, 2.5, 0.01, 0.75));
|
||||
dradius = Gtk::manage (new Adjuster (M("TP_SHARPENING_EDRADIUS"), 0.4, 2.5, 0.01, 0.75));
|
||||
damount = Gtk::manage (new Adjuster (M("TP_SHARPENING_RLD_AMOUNT"), 0.0, 100, 1, 75));
|
||||
ddamping = Gtk::manage (new Adjuster (M("TP_SHARPENING_RLD_DAMPING"), 0, 100, 1, 20));
|
||||
diter = Gtk::manage (new Adjuster (M("TP_SHARPENING_RLD_ITERATIONS"), 5, 100, 1, 30));
|
||||
|
Loading…
x
Reference in New Issue
Block a user