reviewed boxblur code and usage
This commit is contained in:
parent
6026c110fa
commit
6bebc19f02
@ -31,6 +31,7 @@ set(CAMCONSTSFILE "camconst.json")
|
||||
|
||||
set(RTENGINESOURCEFILES
|
||||
badpixels.cc
|
||||
boxblur.cc
|
||||
CA_correct_RT.cc
|
||||
capturesharpening.cc
|
||||
EdgePreservingDecomposition.cc
|
||||
|
@ -479,6 +479,7 @@ enum nrquality {QUALITY_STANDARD, QUALITY_HIGH};
|
||||
void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi)
|
||||
{
|
||||
BENCHFUN
|
||||
|
||||
//#ifdef _DEBUG
|
||||
MyTime t1e, t2e;
|
||||
t1e.set();
|
||||
@ -687,8 +688,8 @@ BENCHFUN
|
||||
}
|
||||
}
|
||||
|
||||
int tilesize;
|
||||
int overlap;
|
||||
int tilesize = 0;
|
||||
int overlap = 0;
|
||||
|
||||
if (settings->leveldnti == 0) {
|
||||
tilesize = 1024;
|
||||
@ -1341,8 +1342,6 @@ BENCHFUN
|
||||
|
||||
#ifdef _OPENMP
|
||||
int masterThread = omp_get_thread_num();
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
@ -1351,11 +1350,9 @@ BENCHFUN
|
||||
#else
|
||||
int subThread = 0;
|
||||
#endif
|
||||
float blurbuffer[TS * TS] ALIGNED64;
|
||||
float *Lblox = LbloxArray[subThread];
|
||||
float *fLblox = fLbloxArray[subThread];
|
||||
float pBuf[width + TS + 2 * blkrad * offset] ALIGNED16;
|
||||
float nbrwt[TS * TS] ALIGNED64;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
@ -1430,7 +1427,7 @@ BENCHFUN
|
||||
|
||||
|
||||
for (int hblk = 0; hblk < numblox_W; ++hblk) {
|
||||
RGBtile_denoise(fLblox, hblk, noisevar_Ldetail, nbrwt, blurbuffer);
|
||||
RGBtile_denoise(fLblox, hblk, noisevar_Ldetail);
|
||||
}//end of horizontal block loop
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
@ -1447,14 +1444,8 @@ BENCHFUN
|
||||
//add row of blocks to output image tile
|
||||
RGBoutput_tile_row(Lblox, Ldetail, tilemask_out, height, width, topproc);
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
}//end of vertical block loop
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
}
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||
@ -2041,26 +2032,20 @@ BENCHFUN
|
||||
|
||||
}//end of main RGB_denoise
|
||||
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
|
||||
void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT
|
||||
void ImProcFunctions::RGBtile_denoise(float* fLblox, int hblproc, float noisevar_Ldetail) //for DCT
|
||||
{
|
||||
int blkstart = hblproc * TS * TS;
|
||||
float nbrwt[TS * TS] ALIGNED64;
|
||||
const int blkstart = hblproc * TS * TS;
|
||||
|
||||
boxabsblur(fLblox + blkstart, nbrwt, 3, 3, TS, TS, blurbuffer); //blur neighbor weights for more robust estimation //for DCT
|
||||
boxabsblur(fLblox + blkstart, nbrwt, 3, TS, TS, false); //blur neighbor weights for more robust estimation //for DCT
|
||||
|
||||
#ifdef __SSE2__
|
||||
__m128 tempv;
|
||||
__m128 noisevar_Ldetailv = _mm_set1_ps(noisevar_Ldetail);
|
||||
__m128 onev = _mm_set1_ps(1.0f);
|
||||
const vfloat noisevar_Ldetailv = F2V(-1.f / noisevar_Ldetail);
|
||||
const vfloat onev = F2V(1.f);
|
||||
|
||||
for (int n = 0; n < TS * TS; n += 4) { //for DCT
|
||||
tempv = onev - xexpf(-SQRV(LVF(nbrwt[n])) / noisevar_Ldetailv);
|
||||
_mm_storeu_ps(&fLblox[blkstart + n], LVFU(fLblox[blkstart + n]) * tempv);
|
||||
const vfloat tempv = onev - xexpf(SQRV(LVF(nbrwt[n])) * noisevar_Ldetailv);
|
||||
STVF(fLblox[blkstart + n], LVF(fLblox[blkstart + n]) * tempv);
|
||||
}//output neighbor averaged result
|
||||
|
||||
#else
|
||||
@ -2071,14 +2056,7 @@ void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noiseva
|
||||
|
||||
#endif
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//printf("vblk=%d hlk=%d wsqave=%f || ",vblproc,hblproc,wsqave);
|
||||
|
||||
}//end of function tile_denoise
|
||||
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
}
|
||||
|
||||
void ImProcFunctions::RGBoutput_tile_row(float *bloxrow_L, float ** Ldetail, float ** tilemask_out, int height, int width, int top)
|
||||
{
|
||||
@ -2207,7 +2185,7 @@ void ImProcFunctions::Noise_residualAB(const wavelet_decomposition &WaveletCoeff
|
||||
chmaxresid = maxresid;
|
||||
}
|
||||
|
||||
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3])
|
||||
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3])
|
||||
{
|
||||
int maxlvl = min(WaveletCoeffs_L.maxlevel(), 5);
|
||||
const float eps = 0.01f;
|
||||
@ -2258,23 +2236,22 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
|
||||
//simple wavelet shrinkage
|
||||
float * sfave = buffer[0] + 32;
|
||||
float * sfaved = buffer[2] + 96;
|
||||
float * blurBuffer = buffer[1] + 64;
|
||||
|
||||
float mad_Lr = madL[lvl][dir - 1];
|
||||
|
||||
float levelFactor = mad_Lr * 5.f / (lvl + 1);
|
||||
#ifdef __SSE2__
|
||||
__m128 mad_Lv;
|
||||
__m128 ninev = _mm_set1_ps(9.0f);
|
||||
__m128 epsv = _mm_set1_ps(eps);
|
||||
__m128 mag_Lv;
|
||||
__m128 levelFactorv = _mm_set1_ps(levelFactor);
|
||||
vfloat mad_Lv;
|
||||
vfloat ninev = F2V(9.0f);
|
||||
vfloat epsv = F2V(eps);
|
||||
vfloat mag_Lv;
|
||||
vfloat levelFactorv = F2V(levelFactor);
|
||||
int coeffloc_L;
|
||||
|
||||
for (coeffloc_L = 0; coeffloc_L < Hlvl_L * Wlvl_L - 3; coeffloc_L += 4) {
|
||||
mad_Lv = LVFU(noisevarlum[coeffloc_L]) * levelFactorv;
|
||||
mag_Lv = SQRV(LVFU(WavCoeffs_L[dir][coeffloc_L]));
|
||||
_mm_storeu_ps(&sfave[coeffloc_L], mag_Lv / (mag_Lv + mad_Lv * xexpf(-mag_Lv / (mad_Lv * ninev)) + epsv));
|
||||
STVFU(sfave[coeffloc_L], mag_Lv / (mag_Lv + mad_Lv * xexpf(-mag_Lv / (mad_Lv * ninev)) + epsv));
|
||||
}
|
||||
|
||||
for (; coeffloc_L < Hlvl_L * Wlvl_L; ++coeffloc_L) {
|
||||
@ -2294,15 +2271,15 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
|
||||
}
|
||||
|
||||
#endif
|
||||
boxblur(sfave, sfaved, blurBuffer, lvl + 2, lvl + 2, Wlvl_L, Hlvl_L); //increase smoothness by locally averaging shrinkage
|
||||
boxblur(sfave, sfaved, lvl + 2, Wlvl_L, Hlvl_L, false); //increase smoothness by locally averaging shrinkage
|
||||
#ifdef __SSE2__
|
||||
__m128 sfavev;
|
||||
__m128 sf_Lv;
|
||||
vfloat sfavev;
|
||||
vfloat sf_Lv;
|
||||
|
||||
for (coeffloc_L = 0; coeffloc_L < Hlvl_L * Wlvl_L - 3; coeffloc_L += 4) {
|
||||
sfavev = LVFU(sfaved[coeffloc_L]);
|
||||
sf_Lv = LVFU(sfave[coeffloc_L]);
|
||||
_mm_storeu_ps(&WavCoeffs_L[dir][coeffloc_L], LVFU(WavCoeffs_L[dir][coeffloc_L]) * (SQRV(sfavev) + SQRV(sf_Lv)) / (sfavev + sf_Lv + epsv));
|
||||
STVFU(WavCoeffs_L[dir][coeffloc_L], LVFU(WavCoeffs_L[dir][coeffloc_L]) * (SQRV(sfavev) + SQRV(sf_Lv)) / (sfavev + sf_Lv + epsv));
|
||||
//use smoothed shrinkage unless local shrinkage is much less
|
||||
}
|
||||
|
||||
@ -2340,7 +2317,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &Wavelet
|
||||
return (!memoryAllocationFailed);
|
||||
}
|
||||
|
||||
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab,
|
||||
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab,
|
||||
float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb)
|
||||
{
|
||||
int maxlvl = WaveletCoeffs_L.maxlevel();
|
||||
@ -2422,12 +2399,12 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
|
||||
if (noisevar_ab > 0.001f) {
|
||||
|
||||
#ifdef __SSE2__
|
||||
__m128 onev = _mm_set1_ps(1.f);
|
||||
__m128 mad_abrv = _mm_set1_ps(mad_abr);
|
||||
__m128 rmad_Lm9v = onev / _mm_set1_ps(mad_Lr * 9.f);
|
||||
__m128 mad_abv;
|
||||
__m128 mag_Lv, mag_abv;
|
||||
__m128 tempabv;
|
||||
vfloat onev = F2V(1.f);
|
||||
vfloat mad_abrv = F2V(mad_abr);
|
||||
vfloat rmad_Lm9v = onev / F2V(mad_Lr * 9.f);
|
||||
vfloat mad_abv;
|
||||
vfloat mag_Lv, mag_abv;
|
||||
vfloat tempabv;
|
||||
int coeffloc_ab;
|
||||
|
||||
for (coeffloc_ab = 0; coeffloc_ab < Hlvl_ab * Wlvl_ab - 3; coeffloc_ab += 4) {
|
||||
@ -2437,7 +2414,7 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
|
||||
mag_Lv = LVFU(WavCoeffs_L[dir][coeffloc_ab]);
|
||||
mag_abv = SQRV(tempabv);
|
||||
mag_Lv = SQRV(mag_Lv) * rmad_Lm9v;
|
||||
_mm_storeu_ps(&WavCoeffs_ab[dir][coeffloc_ab], tempabv * SQRV((onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv)))));
|
||||
STVFU(WavCoeffs_ab[dir][coeffloc_ab], tempabv * SQRV((onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv)))));
|
||||
}
|
||||
|
||||
// few remaining pixels
|
||||
@ -2470,17 +2447,15 @@ bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &Wavele
|
||||
}
|
||||
|
||||
for (int i = 2; i >= 0; i--) {
|
||||
if (buffer[i] != nullptr) {
|
||||
delete[] buffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
return (!memoryAllocationFailed);
|
||||
}
|
||||
|
||||
|
||||
bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge)//mod JD
|
||||
bool ImProcFunctions::WaveletDenoiseAllL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge)//mod JD
|
||||
|
||||
{
|
||||
|
||||
@ -2530,16 +2505,14 @@ bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L,
|
||||
}
|
||||
|
||||
for (int i = 3; i >= 0; i--) {
|
||||
if (buffer[i] != nullptr) {
|
||||
delete[] buffer[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
return (!memoryAllocationFailed);
|
||||
}
|
||||
|
||||
|
||||
bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab,
|
||||
bool ImProcFunctions::WaveletDenoiseAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab,
|
||||
float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb)//mod JD
|
||||
|
||||
{
|
||||
@ -2596,7 +2569,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
|
||||
void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir,
|
||||
void ImProcFunctions::ShrinkAllL(const wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir,
|
||||
float *noisevarlum, float * madL, float * vari, int edge)
|
||||
|
||||
{
|
||||
@ -2607,12 +2580,12 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float *
|
||||
float * sfaved = buffer[1] + 64;
|
||||
float * blurBuffer = buffer[2] + 96;
|
||||
|
||||
int W_L = WaveletCoeffs_L.level_W(level);
|
||||
int H_L = WaveletCoeffs_L.level_H(level);
|
||||
const int W_L = WaveletCoeffs_L.level_W(level);
|
||||
const int H_L = WaveletCoeffs_L.level_H(level);
|
||||
|
||||
float ** WavCoeffs_L = WaveletCoeffs_L.level_coeffs(level);
|
||||
// printf("OK lev=%d\n",level);
|
||||
float mad_L = madL[dir - 1] ;
|
||||
const float mad_L = madL[dir - 1] ;
|
||||
const float levelFactor = mad_L * 5.f / static_cast<float>(level + 1);
|
||||
|
||||
if (edge == 1 && vari) {
|
||||
noisevarlum = blurBuffer; // we need one buffer, but fortunately we don't have to allocate a new one because we can use blurBuffer
|
||||
@ -2622,71 +2595,45 @@ void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float *
|
||||
}
|
||||
}
|
||||
|
||||
float levelFactor = mad_L * 5.f / static_cast<float>(level + 1);
|
||||
int i = 0;
|
||||
#ifdef __SSE2__
|
||||
__m128 magv;
|
||||
__m128 levelFactorv = _mm_set1_ps(levelFactor);
|
||||
__m128 mad_Lv;
|
||||
__m128 ninev = _mm_set1_ps(9.0f);
|
||||
__m128 epsv = _mm_set1_ps(eps);
|
||||
int i;
|
||||
const vfloat levelFactorv = F2V(levelFactor);
|
||||
const vfloat ninev = F2V(9.f);
|
||||
const vfloat epsv = F2V(eps);
|
||||
|
||||
for (i = 0; i < W_L * H_L - 3; i += 4) {
|
||||
mad_Lv = LVFU(noisevarlum[i]) * levelFactorv;
|
||||
magv = SQRV(LVFU(WavCoeffs_L[dir][i]));
|
||||
_mm_storeu_ps(&sfave[i], magv / (magv + mad_Lv * xexpf(-magv / (ninev * mad_Lv)) + epsv));
|
||||
for (; i < W_L * H_L - 3; i += 4) {
|
||||
const vfloat mad_Lv = LVFU(noisevarlum[i]) * levelFactorv;
|
||||
const vfloat magv = SQRV(LVFU(WavCoeffs_L[dir][i]));
|
||||
STVFU(sfave[i], magv / (magv + mad_Lv * xexpf(-magv / (ninev * mad_Lv)) + epsv));
|
||||
}
|
||||
|
||||
#endif
|
||||
// few remaining pixels
|
||||
for (; i < W_L * H_L; ++i) {
|
||||
float mag = SQR(WavCoeffs_L[dir][i]);
|
||||
const float mag = SQR(WavCoeffs_L[dir][i]);
|
||||
sfave[i] = mag / (mag + levelFactor * noisevarlum[i] * xexpf(-mag / (9 * levelFactor * noisevarlum[i])) + eps);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (int i = 0; i < W_L * H_L; ++i) {
|
||||
|
||||
float mag = SQR(WavCoeffs_L[dir][i]);
|
||||
float shrinkfactor = mag / (mag + levelFactor * noisevarlum[i] * xexpf(-mag / (9 * levelFactor * noisevarlum[i])) + eps);
|
||||
sfave[i] = shrinkfactor;
|
||||
}
|
||||
|
||||
#endif
|
||||
boxblur(sfave, sfaved, blurBuffer, level + 2, level + 2, W_L, H_L); //increase smoothness by locally averaging shrinkage
|
||||
boxblur(sfave, sfaved, level + 2, W_L, H_L, false); //increase smoothness by locally averaging shrinkage
|
||||
|
||||
i = 0;
|
||||
#ifdef __SSE2__
|
||||
__m128 sfv;
|
||||
|
||||
for (i = 0; i < W_L * H_L - 3; i += 4) {
|
||||
sfv = LVFU(sfave[i]);
|
||||
for (; i < W_L * H_L - 3; i += 4) {
|
||||
const vfloat sfv = LVFU(sfave[i]);
|
||||
//use smoothed shrinkage unless local shrinkage is much less
|
||||
_mm_storeu_ps(&WavCoeffs_L[dir][i], _mm_loadu_ps(&WavCoeffs_L[dir][i]) * (SQRV(LVFU(sfaved[i])) + SQRV(sfv)) / (LVFU(sfaved[i]) + sfv + epsv));
|
||||
STVFU(WavCoeffs_L[dir][i], LVFU(WavCoeffs_L[dir][i]) * (SQRV(LVFU(sfaved[i])) + SQRV(sfv)) / (LVFU(sfaved[i]) + sfv + epsv));
|
||||
}
|
||||
|
||||
#endif
|
||||
// few remaining pixels
|
||||
for (; i < W_L * H_L; ++i) {
|
||||
float sf = sfave[i];
|
||||
|
||||
const float sf = sfave[i];
|
||||
//use smoothed shrinkage unless local shrinkage is much less
|
||||
WavCoeffs_L[dir][i] *= (SQR(sfaved[i]) + SQR(sf)) / (sfaved[i] + sf + eps);
|
||||
}//now luminance coefficients are denoised
|
||||
|
||||
#else
|
||||
|
||||
for (int i = 0; i < W_L * H_L; ++i) {
|
||||
float sf = sfave[i];
|
||||
|
||||
//use smoothed shrinkage unless local shrinkage is much less
|
||||
WavCoeffs_L[dir][i] *= (SQR(sfaved[i]) + SQR(sf)) / (sfaved[i] + sf + eps);
|
||||
|
||||
}//now luminance coefficients are denoised
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir,
|
||||
void ImProcFunctions::ShrinkAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir,
|
||||
float *noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch,
|
||||
bool denoiseMethodRgb, float * madL, float * madaab, bool madCalculated)
|
||||
|
||||
@ -2700,7 +2647,6 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele
|
||||
|
||||
float * sfaveab = buffer[0] + 32;
|
||||
float * sfaveabd = buffer[1] + 64;
|
||||
float * blurBuffer = buffer[2] + 96;
|
||||
|
||||
int W_ab = WaveletCoeffs_ab.level_W(level);
|
||||
int H_ab = WaveletCoeffs_ab.level_H(level);
|
||||
@ -2724,12 +2670,12 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele
|
||||
if (noisevar_ab > 0.001f) {
|
||||
madab = useNoiseCCurve ? madab : madab * noisevar_ab;
|
||||
#ifdef __SSE2__
|
||||
__m128 onev = _mm_set1_ps(1.f);
|
||||
__m128 mad_abrv = _mm_set1_ps(madab);
|
||||
vfloat onev = F2V(1.f);
|
||||
vfloat mad_abrv = F2V(madab);
|
||||
|
||||
__m128 rmadLm9v = onev / _mm_set1_ps(mad_L * 9.f);
|
||||
__m128 mad_abv ;
|
||||
__m128 mag_Lv, mag_abv;
|
||||
vfloat rmadLm9v = onev / F2V(mad_L * 9.f);
|
||||
vfloat mad_abv ;
|
||||
vfloat mag_Lv, mag_abv;
|
||||
int coeffloc_ab;
|
||||
|
||||
for (coeffloc_ab = 0; coeffloc_ab < H_ab * W_ab - 3; coeffloc_ab += 4) {
|
||||
@ -2738,7 +2684,7 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele
|
||||
mag_Lv = LVFU(WavCoeffs_L[dir][coeffloc_ab]);
|
||||
mag_abv = SQRV(LVFU(WavCoeffs_ab[dir][coeffloc_ab]));
|
||||
mag_Lv = (SQRV(mag_Lv)) * rmadLm9v;
|
||||
_mm_storeu_ps(&sfaveab[coeffloc_ab], (onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv))));
|
||||
STVFU(sfaveab[coeffloc_ab], (onev - xexpf(-(mag_abv / mad_abv) - (mag_Lv))));
|
||||
}
|
||||
|
||||
// few remaining pixels
|
||||
@ -2761,18 +2707,18 @@ void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavele
|
||||
|
||||
#endif
|
||||
|
||||
boxblur(sfaveab, sfaveabd, blurBuffer, level + 2, level + 2, W_ab, H_ab); //increase smoothness by locally averaging shrinkage
|
||||
boxblur(sfaveab, sfaveabd, level + 2, W_ab, H_ab, false); //increase smoothness by locally averaging shrinkage
|
||||
#ifdef __SSE2__
|
||||
__m128 epsv = _mm_set1_ps(eps);
|
||||
__m128 sfabv;
|
||||
__m128 sfaveabv;
|
||||
vfloat epsv = F2V(eps);
|
||||
vfloat sfabv;
|
||||
vfloat sfaveabv;
|
||||
|
||||
for (coeffloc_ab = 0; coeffloc_ab < H_ab * W_ab - 3; coeffloc_ab += 4) {
|
||||
sfabv = LVFU(sfaveab[coeffloc_ab]);
|
||||
sfaveabv = LVFU(sfaveabd[coeffloc_ab]);
|
||||
|
||||
//use smoothed shrinkage unless local shrinkage is much less
|
||||
_mm_storeu_ps(&WavCoeffs_ab[dir][coeffloc_ab], LVFU(WavCoeffs_ab[dir][coeffloc_ab]) * (SQRV(sfaveabv) + SQRV(sfabv)) / (sfaveabv + sfabv + epsv));
|
||||
STVFU(WavCoeffs_ab[dir][coeffloc_ab], LVFU(WavCoeffs_ab[dir][coeffloc_ab]) * (SQRV(sfaveabv) + SQRV(sfabv)) / (sfaveabv + sfabv + epsv));
|
||||
}
|
||||
|
||||
// few remaining pixels
|
||||
@ -2919,8 +2865,8 @@ void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b,
|
||||
}
|
||||
|
||||
|
||||
void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition &WaveletCoeffs_a,
|
||||
wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice,
|
||||
void ImProcFunctions::WaveletDenoiseAll_info(int levwav, const wavelet_decomposition &WaveletCoeffs_a,
|
||||
const wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice,
|
||||
float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb)
|
||||
{
|
||||
|
||||
@ -3106,7 +3052,7 @@ void ImProcFunctions::calcautodn_info(float &chaut, float &delta, int Nb, int le
|
||||
|
||||
}
|
||||
|
||||
void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread)
|
||||
void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, const LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread)
|
||||
{
|
||||
if ((settings->leveldnautsimpl == 1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl == 0 && dnparams.C2method == "MANU")) {
|
||||
//nothing to do
|
||||
@ -3173,8 +3119,8 @@ void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc,
|
||||
|
||||
const float gain = pow(2.0f, float(expcomp));
|
||||
|
||||
int tilesize;
|
||||
int overlap;
|
||||
int tilesize = 0;
|
||||
int overlap = 0;
|
||||
|
||||
if (settings->leveldnti == 0) {
|
||||
tilesize = 1024;
|
||||
@ -3275,16 +3221,16 @@ void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc,
|
||||
for (int i = tiletop; i < tilebottom; i += 2) {
|
||||
int i1 = i - tiletop;
|
||||
#ifdef __SSE2__
|
||||
__m128 aNv, bNv;
|
||||
__m128 c100v = _mm_set1_ps(100.f);
|
||||
vfloat aNv, bNv;
|
||||
vfloat c100v = F2V(100.f);
|
||||
int j;
|
||||
|
||||
for (j = tileleft; j < tileright - 7; j += 8) {
|
||||
int j1 = j - tileleft;
|
||||
aNv = LVFU(acalc[i >> 1][j >> 1]);
|
||||
bNv = LVFU(bcalc[i >> 1][j >> 1]);
|
||||
_mm_storeu_ps(&noisevarhue[i1 >> 1][j1 >> 1], xatan2f(bNv, aNv));
|
||||
_mm_storeu_ps(&noisevarchrom[i1 >> 1][j1 >> 1], vmaxf(vsqrtf(SQRV(aNv) + SQRV(bNv)),c100v));
|
||||
STVFU(noisevarhue[i1 >> 1][j1 >> 1], xatan2f(bNv, aNv));
|
||||
STVFU(noisevarchrom[i1 >> 1][j1 >> 1], vmaxf(vsqrtf(SQRV(aNv) + SQRV(bNv)),c100v));
|
||||
}
|
||||
|
||||
for (; j < tileright; j += 2) {
|
||||
|
420
rtengine/boxblur.cc
Normal file
420
rtengine/boxblur.cc
Normal file
@ -0,0 +1,420 @@
|
||||
/*
|
||||
* This file is part of RawTherapee.
|
||||
*
|
||||
* Copyright (C) 2010 Emil Martinec <ejmartin@uchicago.edu>
|
||||
* Copyright (C) 2019 Ingo Weyrich <heckflosse67@gmx.de>
|
||||
*
|
||||
* RawTherapee is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* RawTherapee is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with RawTherapee. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <memory>
|
||||
#include <cmath>
|
||||
|
||||
#include "boxblur.h"
|
||||
|
||||
#include "rt_math.h"
|
||||
#include "opthelper.h"
|
||||
|
||||
namespace rtengine
|
||||
{
|
||||
|
||||
void boxblur(float** src, float** dst, int radius, int W, int H, bool multiThread)
|
||||
{
|
||||
//box blur using rowbuffers and linebuffers instead of a full size buffer
|
||||
|
||||
if (radius == 0) {
|
||||
if (src != dst) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for if (multiThread)
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; ++row) {
|
||||
for (int col = 0; col < W; ++col) {
|
||||
dst[row][col] = src[row][col];
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel if (multiThread)
|
||||
#endif
|
||||
{
|
||||
std::unique_ptr<float[]> buffer(new float[numCols * (radius + 1)]);
|
||||
|
||||
//horizontal blur
|
||||
float* const lineBuffer = buffer.get();
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int row = 0; row < H; ++row) {
|
||||
float len = radius + 1;
|
||||
float tempval = src[row][0];
|
||||
lineBuffer[0] = tempval;
|
||||
for (int j = 1; j <= radius; j++) {
|
||||
tempval += src[row][j];
|
||||
}
|
||||
|
||||
tempval /= len;
|
||||
dst[row][0] = tempval;
|
||||
|
||||
for (int col = 1; col <= radius; ++col) {
|
||||
lineBuffer[col] = src[row][col];
|
||||
tempval = (tempval * len + src[row][col + radius]) / (len + 1);
|
||||
dst[row][col] = tempval;
|
||||
++len;
|
||||
}
|
||||
int pos = 0;
|
||||
for (int col = radius + 1; col < W - radius; ++col) {
|
||||
const float oldVal = lineBuffer[pos];
|
||||
lineBuffer[pos] = src[row][col];
|
||||
tempval = tempval + (src[row][col + radius] - oldVal) / len;
|
||||
dst[row][col] = tempval;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
|
||||
for (int col = W - radius; col < W; ++col) {
|
||||
tempval = (tempval * len - lineBuffer[pos]) / (len - 1);
|
||||
dst[row][col] = tempval;
|
||||
--len;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
|
||||
//vertical blur
|
||||
#ifdef __SSE2__
|
||||
vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer.get();
|
||||
const vfloat leninitv = F2V(radius + 1);
|
||||
const vfloat onev = F2V(1.f);
|
||||
vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
|
||||
for (int col = 0; col < W - 7; col += 8) {
|
||||
lenv = leninitv;
|
||||
tempv = LVFU(dst[0][col]);
|
||||
temp1v = LVFU(dst[0][col + 4]);
|
||||
rowBuffer[0][0] = tempv;
|
||||
rowBuffer[0][1] = temp1v;
|
||||
|
||||
for (int i = 1; i <= radius; ++i) {
|
||||
tempv = tempv + LVFU(dst[i][col]);
|
||||
temp1v = temp1v + LVFU(dst[i][col + 4]);
|
||||
}
|
||||
|
||||
tempv = tempv / lenv;
|
||||
temp1v = temp1v / lenv;
|
||||
STVFU(dst[0][col], tempv);
|
||||
STVFU(dst[0][col + 4], temp1v);
|
||||
|
||||
for (int row = 1; row <= radius; ++row) {
|
||||
rowBuffer[row][0] = LVFU(dst[row][col]);
|
||||
rowBuffer[row][1] = LVFU(dst[row][col + 4]);
|
||||
lenp1v = lenv + onev;
|
||||
tempv = (tempv * lenv + LVFU(dst[row + radius][col])) / lenp1v;
|
||||
temp1v = (temp1v * lenv + LVFU(dst[row + radius][col + 4])) / lenp1v;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
lenv = lenp1v;
|
||||
}
|
||||
|
||||
rlenv = onev / lenv;
|
||||
int pos = 0;
|
||||
for (int row = radius + 1; row < H - radius; ++row) {
|
||||
vfloat oldVal0 = rowBuffer[pos][0];
|
||||
vfloat oldVal1 = rowBuffer[pos][1];
|
||||
rowBuffer[pos][0] = LVFU(dst[row][col]);
|
||||
rowBuffer[pos][1] = LVFU(dst[row][col + 4]);
|
||||
tempv = tempv + (LVFU(dst[row + radius][col]) - oldVal0) * rlenv ;
|
||||
temp1v = temp1v + (LVFU(dst[row + radius][col + 4]) - oldVal1) * rlenv ;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
|
||||
for (int row = H - radius; row < H; ++row) {
|
||||
lenm1v = lenv - onev;
|
||||
tempv = (tempv * lenv - rowBuffer[pos][0]) / lenm1v;
|
||||
temp1v = (temp1v * lenv - rowBuffer[pos][1]) / lenm1v;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
lenv = lenm1v;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
float (* const rowBuffer)[8] = (float(*)[8]) buffer.get();
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
|
||||
for (int col = 0; col < W - numCols + 1; col += 8) {
|
||||
float len = radius + 1;
|
||||
|
||||
for (int k = 0; k < numCols; ++k) {
|
||||
rowBuffer[0][k] = dst[0][col + k];
|
||||
}
|
||||
|
||||
for (int i = 1; i <= radius; ++i) {
|
||||
for (int k = 0; k < numCols; ++k) {
|
||||
dst[0][col + k] += dst[i][col + k];
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = 0; k < numCols; ++k) {
|
||||
dst[0][col + k] /= len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= radius; ++row) {
|
||||
for(int k = 0; k < numCols; ++k) {
|
||||
rowBuffer[row][k] = dst[row][col + k];
|
||||
dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1);
|
||||
}
|
||||
|
||||
len ++;
|
||||
}
|
||||
|
||||
int pos = 0;
|
||||
for (int row = radius + 1; row < H - radius; ++row) {
|
||||
for(int k = 0; k < numCols; ++k) {
|
||||
float oldVal = rowBuffer[pos][k];
|
||||
rowBuffer[pos][k] = dst[row][col + k];
|
||||
dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) / len;
|
||||
}
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
|
||||
for (int row = H - radius; row < H; ++row) {
|
||||
for(int k = 0; k < numCols; ++k) {
|
||||
dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[pos][k]) / (len - 1);
|
||||
}
|
||||
len --;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
//vertical blur, remaining columns
|
||||
#ifdef _OPENMP
|
||||
#pragma omp single
|
||||
#endif
|
||||
{
|
||||
const int remaining = W % numCols;
|
||||
|
||||
if (remaining > 0) {
|
||||
float (* const rowBuffer)[8] = (float(*)[8]) buffer.get();
|
||||
const int col = W - remaining;
|
||||
|
||||
float len = radius + 1;
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
rowBuffer[0][k] = dst[0][col + k];
|
||||
}
|
||||
for (int row = 1; row <= radius; ++row) {
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
dst[0][col + k] += dst[row][col + k];
|
||||
}
|
||||
}
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
dst[0][col + k] /= len;
|
||||
}
|
||||
for (int row = 1; row <= radius; ++row) {
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
rowBuffer[row][k] = dst[row][col + k];
|
||||
dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1);
|
||||
}
|
||||
len ++;
|
||||
}
|
||||
const float rlen = 1.f / len;
|
||||
int pos = 0;
|
||||
for (int row = radius + 1; row < H - radius; ++row) {
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
float oldVal = rowBuffer[pos][k];
|
||||
rowBuffer[pos][k] = dst[row][col + k];
|
||||
dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) * rlen;
|
||||
}
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
for (int row = H - radius; row < H; ++row) {
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
dst[row][col + k] = (dst[(row - 1)][col + k] * len - rowBuffer[pos][k]) / (len - 1);
|
||||
}
|
||||
len --;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void boxabsblur(float** src, float** dst, int radius, int W, int H, bool multiThread)
|
||||
{
|
||||
//abs box blur using rowbuffers and linebuffers instead of a full size buffer, W should be a multiple of 16
|
||||
|
||||
if (radius == 0) {
|
||||
if (src != dst) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for if (multiThread)
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; ++row) {
|
||||
for (int col = 0; col < W; ++col) {
|
||||
dst[row][col] = std::fabs(src[row][col]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr int numCols = 16; // process numCols columns at once for better usage of L1 cpu cache
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel if (multiThread)
|
||||
#endif
|
||||
{
|
||||
float buffer[numCols * (radius + 1)] ALIGNED64;
|
||||
|
||||
//horizontal blur
|
||||
float* const lineBuffer = buffer;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int row = 0; row < H; ++row) {
|
||||
float len = radius + 1;
|
||||
float tempval = std::fabs(src[row][0]);
|
||||
lineBuffer[0] = tempval;
|
||||
for (int j = 1; j <= radius; j++) {
|
||||
tempval += std::fabs(src[row][j]);
|
||||
}
|
||||
|
||||
tempval /= len;
|
||||
dst[row][0] = tempval;
|
||||
|
||||
for (int col = 1; col <= radius; ++col) {
|
||||
lineBuffer[col] = std::fabs(src[row][col]);
|
||||
tempval = (tempval * len + std::fabs(src[row][col + radius])) / (len + 1);
|
||||
dst[row][col] = tempval;
|
||||
++len;
|
||||
}
|
||||
|
||||
const float rlen = 1.f / len;
|
||||
int pos = 0;
|
||||
for (int col = radius + 1; col < W - radius; ++col) {
|
||||
const float oldVal = lineBuffer[pos];
|
||||
lineBuffer[pos] = std::fabs(src[row][col]);
|
||||
tempval = tempval + (std::fabs(src[row][col + radius]) - oldVal) * rlen;
|
||||
dst[row][col] = tempval;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
|
||||
for (int col = W - radius; col < W; ++col) {
|
||||
tempval = (tempval * len - lineBuffer[pos]) / (len - 1);
|
||||
dst[row][col] = tempval;
|
||||
--len;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
|
||||
//vertical blur
|
||||
float (* const rowBuffer)[numCols] = (float(*)[numCols]) buffer;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int col = 0; col < W; col += numCols) {
|
||||
float len = radius + 1;
|
||||
|
||||
for (int k = 0; k < numCols; ++k) {
|
||||
rowBuffer[0][k] = dst[0][col + k];
|
||||
}
|
||||
|
||||
for (int i = 1; i <= radius; ++i) {
|
||||
for (int k = 0; k < numCols; ++k) {
|
||||
dst[0][col + k] += dst[i][col + k];
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = 0; k < numCols; ++k) {
|
||||
dst[0][col + k] /= len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= radius; ++row) {
|
||||
for(int k = 0; k < numCols; ++k) {
|
||||
rowBuffer[row][k] = dst[row][col + k];
|
||||
dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1);
|
||||
}
|
||||
|
||||
++len;
|
||||
}
|
||||
|
||||
const float rlen = 1.f / len;
|
||||
int pos = 0;
|
||||
for (int row = radius + 1; row < H - radius; ++row) {
|
||||
for(int k = 0; k < numCols; ++k) {
|
||||
float oldVal = rowBuffer[pos][k];
|
||||
rowBuffer[pos][k] = dst[row][col + k];
|
||||
dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) * rlen;
|
||||
}
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
|
||||
for (int row = H - radius; row < H; ++row) {
|
||||
for(int k = 0; k < numCols; ++k) {
|
||||
dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[pos][k]) / (len - 1);
|
||||
}
|
||||
--len;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void boxblur(float* src, float* dst, int radius, int W, int H, bool multiThread)
|
||||
{
|
||||
float* srcp[H];
|
||||
float* dstp[H];
|
||||
for (int i = 0; i < H; ++i) {
|
||||
srcp[i] = src + i * W;
|
||||
dstp[i] = dst + i * W;
|
||||
}
|
||||
boxblur(srcp, dstp, radius, W, H, multiThread);
|
||||
}
|
||||
|
||||
void boxabsblur(float* src, float* dst, int radius, int W, int H, bool multiThread)
|
||||
{
|
||||
float* srcp[H];
|
||||
float* dstp[H];
|
||||
for (int i = 0; i < H; ++i) {
|
||||
srcp[i] = src + i * W;
|
||||
dstp[i] = dst + i * W;
|
||||
}
|
||||
boxabsblur(srcp, dstp, radius, W, H, multiThread);
|
||||
}
|
||||
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* This file is part of RawTherapee.
|
||||
*
|
||||
* Copyright (C) 2010 Emil Martinec <ejmartin@uchicago.edu>
|
||||
* Copyright (C) 2019 Ingo Weyrich <heckflosse67@gmx.de>
|
||||
*
|
||||
* RawTherapee is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@ -15,873 +15,15 @@
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with RawTherapee. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef _BOXBLUR_H_
|
||||
#define _BOXBLUR_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <memory>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "alignedbuffer.h"
|
||||
#include "rt_math.h"
|
||||
#include "opthelper.h"
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
namespace rtengine
|
||||
{
|
||||
|
||||
// classical filtering if the support window is small:
|
||||
|
||||
template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, int W, int H)
|
||||
{
|
||||
//box blur image; box range = (radx,rady)
|
||||
assert(2*radx+1 < W);
|
||||
assert(2*rady+1 < H);
|
||||
|
||||
AlignedBuffer<float>* buffer = new AlignedBuffer<float> (W * H);
|
||||
float* temp = buffer->data;
|
||||
|
||||
if (radx == 0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; row++)
|
||||
for (int col = 0; col < W; col++) {
|
||||
temp[row * W + col] = (float)src[row][col];
|
||||
}
|
||||
} else {
|
||||
//horizontal blur
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
temp[row * W + 0] = (float)src[row][0] / len;
|
||||
|
||||
for (int j = 1; j <= radx; j++) {
|
||||
temp[row * W + 0] += (float)src[row][j] / len;
|
||||
}
|
||||
|
||||
for (int col = 1; col <= radx; col++) {
|
||||
temp[row * W + col] = (temp[row * W + col - 1] * len + (float)src[row][col + radx]) / (len + 1);
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int col = radx + 1; col < W - radx; col++) {
|
||||
temp[row * W + col] = temp[row * W + col - 1] + ((float)(src[row][col + radx] - src[row][col - radx - 1])) / len;
|
||||
}
|
||||
|
||||
for (int col = W - radx; col < W; col++) {
|
||||
temp[row * W + col] = (temp[row * W + col - 1] * len - src[row][col - radx - 1]) / (len - 1);
|
||||
len --;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rady == 0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; row++)
|
||||
for (int col = 0; col < W; col++) {
|
||||
dst[row][col] = temp[row * W + col];
|
||||
}
|
||||
} else {
|
||||
//vertical blur
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0][col] = temp[0 * W + col] / len;
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
dst[0][col] += temp[i * W + col] / len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
dst[row][col] = (dst[(row - 1)][col] * len + temp[(row + rady) * W + col]) / (len + 1);
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
dst[row][col] = dst[(row - 1)][col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len;
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
dst[row][col] = (dst[(row - 1)][col] * len - temp[(row - rady - 1) * W + col]) / (len - 1);
|
||||
len --;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete buffer;
|
||||
void boxblur(float** src, float** dst, int radius, int W, int H, bool multiThread);
|
||||
void boxblur(float* src, float* dst, int radius, int W, int H, bool multiThread);
|
||||
void boxabsblur(float** src, float** dst, int radius, int W, int H, bool multiThread);
|
||||
void boxabsblur(float* src, float* dst, int radius, int W, int H, bool multiThread);
|
||||
|
||||
}
|
||||
|
||||
template<class T, class A> void boxblur (T** src, A** dst, T* buffer, int radx, int rady, int W, int H)
|
||||
{
|
||||
//box blur image; box range = (radx,rady)
|
||||
|
||||
float* temp = buffer;
|
||||
|
||||
if (radx == 0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; row++)
|
||||
for (int col = 0; col < W; col++) {
|
||||
temp[row * W + col] = (float)src[row][col];
|
||||
}
|
||||
} else {
|
||||
//horizontal blur
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; row++) {
|
||||
float len = radx + 1;
|
||||
float tempval = (float)src[row][0];
|
||||
|
||||
for (int j = 1; j <= radx; j++) {
|
||||
tempval += (float)src[row][j];
|
||||
}
|
||||
|
||||
tempval /= len;
|
||||
temp[row * W + 0] = tempval;
|
||||
|
||||
for (int col = 1; col <= radx; col++) {
|
||||
temp[row * W + col] = tempval = (tempval * len + (float)src[row][col + radx]) / (len + 1);
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int col = radx + 1; col < W - radx; col++) {
|
||||
temp[row * W + col] = tempval = tempval + ((float)(src[row][col + radx] - src[row][col - radx - 1])) / len;
|
||||
}
|
||||
|
||||
for (int col = W - radx; col < W; col++) {
|
||||
temp[row * W + col] = tempval = (tempval * len - src[row][col - radx - 1]) / (len - 1);
|
||||
len --;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rady == 0) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; row++)
|
||||
for (int col = 0; col < W; col++) {
|
||||
dst[row][col] = temp[row * W + col];
|
||||
}
|
||||
} else {
|
||||
const int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache
|
||||
#ifdef __SSE2__
|
||||
vfloat leninitv = F2V( (float)(rady + 1));
|
||||
vfloat onev = F2V( 1.f );
|
||||
vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int col = 0; col < W - 7; col += 8) {
|
||||
lenv = leninitv;
|
||||
tempv = LVFU(temp[0 * W + col]);
|
||||
temp1v = LVFU(temp[0 * W + col + 4]);
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
tempv = tempv + LVFU(temp[i * W + col]);
|
||||
temp1v = temp1v + LVFU(temp[i * W + col + 4]);
|
||||
}
|
||||
|
||||
tempv = tempv / lenv;
|
||||
temp1v = temp1v / lenv;
|
||||
STVFU(dst[0][col], tempv);
|
||||
STVFU(dst[0][col + 4], temp1v);
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
lenp1v = lenv + onev;
|
||||
tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v;
|
||||
temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
lenv = lenp1v;
|
||||
}
|
||||
|
||||
rlenv = onev / lenv;
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ;
|
||||
temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
lenm1v = lenv - onev;
|
||||
tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v;
|
||||
temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
lenv = lenm1v;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
//vertical blur
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
|
||||
for (int col = 0; col < W - numCols + 1; col += 8) {
|
||||
float len = rady + 1;
|
||||
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
dst[0][col + k] = temp[0 * W + col + k];
|
||||
}
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
dst[0][col + k] += temp[i * W + col + k];
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
dst[0][col + k] /= len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
dst[row][col + k] = (dst[(row - 1)][col + k] * len + temp[(row + rady) * W + col + k]) / (len + 1);
|
||||
}
|
||||
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
dst[row][col + k] = dst[(row - 1)][col + k] + (temp[(row + rady) * W + col + k] - temp[(row - rady - 1) * W + col + k]) / len;
|
||||
}
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
dst[row][col + k] = (dst[(row - 1)][col + k] * len - temp[(row - rady - 1) * W + col + k]) / (len - 1);
|
||||
}
|
||||
|
||||
len --;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp single
|
||||
#endif
|
||||
|
||||
for (int col = W - (W % numCols); col < W; col++) {
|
||||
float len = rady + 1;
|
||||
dst[0][col] = temp[0 * W + col] / len;
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
dst[0][col] += temp[i * W + col] / len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
dst[row][col] = (dst[(row - 1)][col] * len + temp[(row + rady) * W + col]) / (len + 1);
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
dst[row][col] = dst[(row - 1)][col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len;
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
dst[row][col] = (dst[(row - 1)][col] * len - temp[(row - rady - 1) * W + col]) / (len - 1);
|
||||
len --;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
inline void boxblur (float** src, float** dst, int radius, int W, int H, bool multiThread)
|
||||
{
|
||||
//box blur using rowbuffers and linebuffers instead of a full size buffer
|
||||
|
||||
if (radius == 0) {
|
||||
if (src != dst) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for if (multiThread)
|
||||
#endif
|
||||
|
||||
for (int row = 0; row < H; row++) {
|
||||
for (int col = 0; col < W; col++) {
|
||||
dst[row][col] = src[row][col];
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
constexpr int numCols = 8; // process numCols columns at once for better usage of L1 cpu cache
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel if (multiThread)
|
||||
#endif
|
||||
{
|
||||
std::unique_ptr<float[]> buffer(new float[numCols * (radius + 1)]);
|
||||
|
||||
//horizontal blur
|
||||
float* const lineBuffer = buffer.get();
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int row = 0; row < H; row++) {
|
||||
float len = radius + 1;
|
||||
float tempval = src[row][0];
|
||||
lineBuffer[0] = tempval;
|
||||
for (int j = 1; j <= radius; j++) {
|
||||
tempval += src[row][j];
|
||||
}
|
||||
|
||||
tempval /= len;
|
||||
dst[row][0] = tempval;
|
||||
|
||||
for (int col = 1; col <= radius; col++) {
|
||||
lineBuffer[col] = src[row][col];
|
||||
tempval = (tempval * len + src[row][col + radius]) / (len + 1);
|
||||
dst[row][col] = tempval;
|
||||
++len;
|
||||
}
|
||||
int pos = 0;
|
||||
for (int col = radius + 1; col < W - radius; col++) {
|
||||
const float oldVal = lineBuffer[pos];
|
||||
lineBuffer[pos] = src[row][col];
|
||||
tempval = tempval + (src[row][col + radius] - oldVal) / len;
|
||||
dst[row][col] = tempval;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
|
||||
for (int col = W - radius; col < W; col++) {
|
||||
tempval = (tempval * len - lineBuffer[pos]) / (len - 1);
|
||||
dst[row][col] = tempval;
|
||||
--len;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
|
||||
//vertical blur
|
||||
#ifdef __SSE2__
|
||||
vfloat (* const rowBuffer)[2] = (vfloat(*)[2]) buffer.get();
|
||||
const vfloat leninitv = F2V(radius + 1);
|
||||
const vfloat onev = F2V(1.f);
|
||||
vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
|
||||
for (int col = 0; col < W - 7; col += 8) {
|
||||
lenv = leninitv;
|
||||
tempv = LVFU(dst[0][col]);
|
||||
temp1v = LVFU(dst[0][col + 4]);
|
||||
rowBuffer[0][0] = tempv;
|
||||
rowBuffer[0][1] = temp1v;
|
||||
|
||||
for (int i = 1; i <= radius; i++) {
|
||||
tempv = tempv + LVFU(dst[i][col]);
|
||||
temp1v = temp1v + LVFU(dst[i][col + 4]);
|
||||
}
|
||||
|
||||
tempv = tempv / lenv;
|
||||
temp1v = temp1v / lenv;
|
||||
STVFU(dst[0][col], tempv);
|
||||
STVFU(dst[0][col + 4], temp1v);
|
||||
|
||||
for (int row = 1; row <= radius; row++) {
|
||||
rowBuffer[row][0] = LVFU(dst[row][col]);
|
||||
rowBuffer[row][1] = LVFU(dst[row][col + 4]);
|
||||
lenp1v = lenv + onev;
|
||||
tempv = (tempv * lenv + LVFU(dst[row + radius][col])) / lenp1v;
|
||||
temp1v = (temp1v * lenv + LVFU(dst[row + radius][col + 4])) / lenp1v;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
lenv = lenp1v;
|
||||
}
|
||||
|
||||
rlenv = onev / lenv;
|
||||
int pos = 0;
|
||||
for (int row = radius + 1; row < H - radius; row++) {
|
||||
vfloat oldVal0 = rowBuffer[pos][0];
|
||||
vfloat oldVal1 = rowBuffer[pos][1];
|
||||
rowBuffer[pos][0] = LVFU(dst[row][col]);
|
||||
rowBuffer[pos][1] = LVFU(dst[row][col + 4]);
|
||||
tempv = tempv + (LVFU(dst[row + radius][col]) - oldVal0) * rlenv ;
|
||||
temp1v = temp1v + (LVFU(dst[row + radius][col + 4]) - oldVal1) * rlenv ;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
|
||||
for (int row = H - radius; row < H; row++) {
|
||||
lenm1v = lenv - onev;
|
||||
tempv = (tempv * lenv - rowBuffer[pos][0]) / lenm1v;
|
||||
temp1v = (temp1v * lenv - rowBuffer[pos][1]) / lenm1v;
|
||||
STVFU(dst[row][col], tempv);
|
||||
STVFU(dst[row][col + 4], temp1v);
|
||||
lenv = lenm1v;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
float (* const rowBuffer)[8] = (float(*)[8]) buffer.get();
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
|
||||
for (int col = 0; col < W - numCols + 1; col += 8) {
|
||||
float len = radius + 1;
|
||||
|
||||
for (int k = 0; k < numCols; k++) {
|
||||
rowBuffer[0][k] = dst[0][col + k];
|
||||
}
|
||||
|
||||
for (int i = 1; i <= radius; i++) {
|
||||
for (int k = 0; k < numCols; k++) {
|
||||
dst[0][col + k] += dst[i][col + k];
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
dst[0][col + k] /= len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= radius; row++) {
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
rowBuffer[row][k] = dst[row][col + k];
|
||||
dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1);
|
||||
}
|
||||
|
||||
len ++;
|
||||
}
|
||||
|
||||
int pos = 0;
|
||||
for (int row = radius + 1; row < H - radius; row++) {
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
float oldVal = rowBuffer[pos][k];
|
||||
rowBuffer[pos][k] = dst[row][col + k];
|
||||
dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) / len;
|
||||
}
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
|
||||
for (int row = H - radius; row < H; row++) {
|
||||
for(int k = 0; k < numCols; k++) {
|
||||
dst[row][col + k] = (dst[row - 1][col + k] * len - rowBuffer[pos][k]) / (len - 1);
|
||||
}
|
||||
len --;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
//vertical blur, remaining columns
|
||||
#ifdef _OPENMP
|
||||
#pragma omp single
|
||||
#endif
|
||||
{
|
||||
const int remaining = W % numCols;
|
||||
|
||||
if (remaining > 0) {
|
||||
float (* const rowBuffer)[8] = (float(*)[8]) buffer.get();
|
||||
const int col = W - remaining;
|
||||
|
||||
float len = radius + 1;
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
rowBuffer[0][k] = dst[0][col + k];
|
||||
}
|
||||
for (int row = 1; row <= radius; ++row) {
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
dst[0][col + k] += dst[row][col + k];
|
||||
}
|
||||
}
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
dst[0][col + k] /= len;
|
||||
}
|
||||
for (int row = 1; row <= radius; ++row) {
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
rowBuffer[row][k] = dst[row][col + k];
|
||||
dst[row][col + k] = (dst[row - 1][col + k] * len + dst[row + radius][col + k]) / (len + 1);
|
||||
}
|
||||
len ++;
|
||||
}
|
||||
const float rlen = 1.f / len;
|
||||
int pos = 0;
|
||||
for (int row = radius + 1; row < H - radius; ++row) {
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
float oldVal = rowBuffer[pos][k];
|
||||
rowBuffer[pos][k] = dst[row][col + k];
|
||||
dst[row][col + k] = dst[row - 1][col + k] + (dst[row + radius][col + k] - oldVal) * rlen;
|
||||
}
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
for (int row = H - radius; row < H; ++row) {
|
||||
for(int k = 0; k < remaining; ++k) {
|
||||
dst[row][col + k] = (dst[(row - 1)][col + k] * len - rowBuffer[pos][k]) / (len - 1);
|
||||
}
|
||||
len --;
|
||||
++pos;
|
||||
pos = pos <= radius ? pos : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class T, class A> void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H)
|
||||
{
|
||||
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
|
||||
|
||||
float* temp = buffer;
|
||||
|
||||
if (radx == 0) {
|
||||
for (int row = 0; row < H; row++)
|
||||
for (int col = 0; col < W; col++) {
|
||||
temp[row * W + col] = src[row * W + col];
|
||||
}
|
||||
} else {
|
||||
//horizontal blur
|
||||
for (int row = H - 1; row >= 0; row--) {
|
||||
int len = radx + 1;
|
||||
float tempval = (float)src[row * W];
|
||||
|
||||
for (int j = 1; j <= radx; j++) {
|
||||
tempval += (float)src[row * W + j];
|
||||
}
|
||||
|
||||
tempval = tempval / len;
|
||||
temp[row * W] = tempval;
|
||||
|
||||
for (int col = 1; col <= radx; col++) {
|
||||
tempval = (tempval * len + src[row * W + col + radx]) / (len + 1);
|
||||
temp[row * W + col] = tempval;
|
||||
len ++;
|
||||
}
|
||||
|
||||
float reclen = 1.f / len;
|
||||
|
||||
for (int col = radx + 1; col < W - radx; col++) {
|
||||
tempval = tempval + ((float)(src[row * W + col + radx] - src[row * W + col - radx - 1])) * reclen;
|
||||
temp[row * W + col] = tempval;
|
||||
}
|
||||
|
||||
for (int col = W - radx; col < W; col++) {
|
||||
tempval = (tempval * len - src[row * W + col - radx - 1]) / (len - 1);
|
||||
temp[row * W + col] = tempval;
|
||||
len --;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rady == 0) {
|
||||
for (int row = 0; row < H; row++)
|
||||
for (int col = 0; col < W; col++) {
|
||||
dst[row * W + col] = temp[row * W + col];
|
||||
}
|
||||
} else {
|
||||
//vertical blur
|
||||
#ifdef __SSE2__
|
||||
vfloat leninitv = F2V( (float)(rady + 1));
|
||||
vfloat onev = F2V( 1.f );
|
||||
vfloat tempv, temp1v, lenv, lenp1v, lenm1v, rlenv;
|
||||
int col;
|
||||
|
||||
for (col = 0; col < W - 7; col += 8) {
|
||||
lenv = leninitv;
|
||||
tempv = LVFU(temp[0 * W + col]);
|
||||
temp1v = LVFU(temp[0 * W + col + 4]);
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
tempv = tempv + LVFU(temp[i * W + col]);
|
||||
temp1v = temp1v + LVFU(temp[i * W + col + 4]);
|
||||
}
|
||||
|
||||
tempv = tempv / lenv;
|
||||
temp1v = temp1v / lenv;
|
||||
STVFU(dst[0 * W + col], tempv);
|
||||
STVFU(dst[0 * W + col + 4], temp1v);
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
lenp1v = lenv + onev;
|
||||
tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v;
|
||||
temp1v = (temp1v * lenv + LVFU(temp[(row + rady) * W + col + 4])) / lenp1v;
|
||||
STVFU(dst[row * W + col], tempv);
|
||||
STVFU(dst[row * W + col + 4], temp1v);
|
||||
lenv = lenp1v;
|
||||
}
|
||||
|
||||
rlenv = onev / lenv;
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ;
|
||||
temp1v = temp1v + (LVFU(temp[(row + rady) * W + col + 4]) - LVFU(temp[(row - rady - 1) * W + col + 4])) * rlenv ;
|
||||
STVFU(dst[row * W + col], tempv);
|
||||
STVFU(dst[row * W + col + 4], temp1v);
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
lenm1v = lenv - onev;
|
||||
tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v;
|
||||
temp1v = (temp1v * lenv - LVFU(temp[(row - rady - 1) * W + col + 4])) / lenm1v;
|
||||
STVFU(dst[row * W + col], tempv);
|
||||
STVFU(dst[row * W + col + 4], temp1v);
|
||||
lenv = lenm1v;
|
||||
}
|
||||
}
|
||||
|
||||
for (; col < W - 3; col += 4) {
|
||||
lenv = leninitv;
|
||||
tempv = LVFU(temp[0 * W + col]);
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
tempv = tempv + LVFU(temp[i * W + col]);
|
||||
}
|
||||
|
||||
tempv = tempv / lenv;
|
||||
STVFU(dst[0 * W + col], tempv);
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
lenp1v = lenv + onev;
|
||||
tempv = (tempv * lenv + LVFU(temp[(row + rady) * W + col])) / lenp1v;
|
||||
STVFU(dst[row * W + col], tempv);
|
||||
lenv = lenp1v;
|
||||
}
|
||||
|
||||
rlenv = onev / lenv;
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
tempv = tempv + (LVFU(temp[(row + rady) * W + col]) - LVFU(temp[(row - rady - 1) * W + col])) * rlenv ;
|
||||
STVFU(dst[row * W + col], tempv);
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
lenm1v = lenv - onev;
|
||||
tempv = (tempv * lenv - LVFU(temp[(row - rady - 1) * W + col])) / lenm1v;
|
||||
STVFU(dst[row * W + col], tempv);
|
||||
lenv = lenm1v;
|
||||
}
|
||||
}
|
||||
|
||||
for (; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0 * W + col] = temp[0 * W + col] / len;
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
dst[0 * W + col] += temp[i * W + col] / len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
dst[row * W + col] = (dst[(row - 1) * W + col] * len + temp[(row + rady) * W + col]) / (len + 1);
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
dst[row * W + col] = dst[(row - 1) * W + col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len;
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
dst[row * W + col] = (dst[(row - 1) * W + col] * len - temp[(row - rady - 1) * W + col]) / (len - 1);
|
||||
len --;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0 * W + col] = temp[0 * W + col] / len;
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
dst[0 * W + col] += temp[i * W + col] / len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
dst[row * W + col] = (dst[(row - 1) * W + col] * len + temp[(row + rady) * W + col]) / (len + 1);
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
dst[row * W + col] = dst[(row - 1) * W + col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len;
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
dst[row * W + col] = (dst[(row - 1) * W + col] * len - temp[(row - rady - 1) * W + col]) / (len - 1);
|
||||
len --;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H, float * temp)
|
||||
{
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
|
||||
|
||||
if (radx == 0) {
|
||||
for (int row = 0; row < H; row++)
|
||||
for (int col = 0; col < W; col++) {
|
||||
temp[row * W + col] = fabs(src[row * W + col]);
|
||||
}
|
||||
} else {
|
||||
//horizontal blur
|
||||
for (int row = 0; row < H; row++) {
|
||||
int len = radx + 1;
|
||||
float tempval = fabsf((float)src[row * W + 0]);
|
||||
|
||||
for (int j = 1; j <= radx; j++) {
|
||||
tempval += fabsf((float)src[row * W + j]);
|
||||
}
|
||||
|
||||
tempval /= len;
|
||||
temp[row * W + 0] = tempval;
|
||||
|
||||
for (int col = 1; col <= radx; col++) {
|
||||
tempval = (tempval * len + fabsf(src[row * W + col + radx])) / (len + 1);
|
||||
temp[row * W + col] = tempval;
|
||||
len ++;
|
||||
}
|
||||
|
||||
float rlen = 1.f / (float)len;
|
||||
|
||||
for (int col = radx + 1; col < W - radx; col++) {
|
||||
tempval = tempval + ((float)(fabsf(src[row * W + col + radx]) - fabsf(src[row * W + col - radx - 1]))) * rlen;
|
||||
temp[row * W + col] = tempval;
|
||||
}
|
||||
|
||||
for (int col = W - radx; col < W; col++) {
|
||||
tempval = (tempval * len - fabsf(src[row * W + col - radx - 1])) / (len - 1);
|
||||
temp[row * W + col] = tempval;
|
||||
len --;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rady == 0) {
|
||||
for (int row = 0; row < H; row++)
|
||||
for (int col = 0; col < W; col++) {
|
||||
dst[row * W + col] = temp[row * W + col];
|
||||
}
|
||||
} else {
|
||||
//vertical blur
|
||||
#ifdef __SSE2__
|
||||
vfloat leninitv = F2V( (float)(rady + 1));
|
||||
vfloat onev = F2V( 1.f );
|
||||
vfloat tempv, lenv, lenp1v, lenm1v, rlenv;
|
||||
|
||||
for (int col = 0; col < W - 3; col += 4) {
|
||||
lenv = leninitv;
|
||||
tempv = LVF(temp[0 * W + col]);
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
tempv = tempv + LVF(temp[i * W + col]);
|
||||
}
|
||||
|
||||
tempv = tempv / lenv;
|
||||
STVF(dst[0 * W + col], tempv);
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
lenp1v = lenv + onev;
|
||||
tempv = (tempv * lenv + LVF(temp[(row + rady) * W + col])) / lenp1v;
|
||||
STVF(dst[row * W + col], tempv);
|
||||
lenv = lenp1v;
|
||||
}
|
||||
|
||||
rlenv = onev / lenv;
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
tempv = tempv + (LVF(temp[(row + rady) * W + col]) - LVF(temp[(row - rady - 1) * W + col])) * rlenv;
|
||||
STVF(dst[row * W + col], tempv);
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
lenm1v = lenv - onev;
|
||||
tempv = (tempv * lenv - LVF(temp[(row - rady - 1) * W + col])) / lenm1v;
|
||||
STVF(dst[row * W + col], tempv);
|
||||
lenv = lenm1v;
|
||||
}
|
||||
}
|
||||
|
||||
for (int col = W - (W % 4); col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0 * W + col] = temp[0 * W + col] / len;
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
dst[0 * W + col] += temp[i * W + col] / len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
dst[row * W + col] = (dst[(row - 1) * W + col] * len + temp[(row + rady) * W + col]) / (len + 1);
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
dst[row * W + col] = dst[(row - 1) * W + col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len;
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
dst[row * W + col] = (dst[(row - 1) * W + col] * len - temp[(row - rady - 1) * W + col]) / (len - 1);
|
||||
len --;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
for (int col = 0; col < W; col++) {
|
||||
int len = rady + 1;
|
||||
dst[0 * W + col] = temp[0 * W + col] / len;
|
||||
|
||||
for (int i = 1; i <= rady; i++) {
|
||||
dst[0 * W + col] += temp[i * W + col] / len;
|
||||
}
|
||||
|
||||
for (int row = 1; row <= rady; row++) {
|
||||
dst[row * W + col] = (dst[(row - 1) * W + col] * len + temp[(row + rady) * W + col]) / (len + 1);
|
||||
len ++;
|
||||
}
|
||||
|
||||
for (int row = rady + 1; row < H - rady; row++) {
|
||||
dst[row * W + col] = dst[(row - 1) * W + col] + (temp[(row + rady) * W + col] - temp[(row - rady - 1) * W + col]) / len;
|
||||
}
|
||||
|
||||
for (int row = H - rady; row < H; row++) {
|
||||
dst[row * W + col] = (dst[(row - 1) * W + col] * len - temp[(row - rady - 1) * W + col]) / (len - 1);
|
||||
len --;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* _BOXBLUR_H_ */
|
||||
|
@ -1349,7 +1349,7 @@ template<class T> void gaussVerticalmult (T** src, T** dst, const int W, const i
|
||||
}
|
||||
#endif
|
||||
|
||||
template<class T> void gaussianBlurImpl(T** src, T** dst, const int W, const int H, const double sigma, T *buffer = nullptr, eGaussType gausstype = GAUSS_STANDARD, T** buffer2 = nullptr)
|
||||
template<class T> void gaussianBlurImpl(T** src, T** dst, const int W, const int H, const double sigma, bool useBoxBlur, eGaussType gausstype = GAUSS_STANDARD, T** buffer2 = nullptr)
|
||||
{
|
||||
static constexpr auto GAUSS_SKIP = 0.25;
|
||||
static constexpr auto GAUSS_3X3_LIMIT = 0.6;
|
||||
@ -1357,7 +1357,7 @@ template<class T> void gaussianBlurImpl(T** src, T** dst, const int W, const int
|
||||
static constexpr auto GAUSS_7X7_LIMIT = 1.15;
|
||||
static constexpr auto GAUSS_DOUBLE = 25.0;
|
||||
|
||||
if(buffer) {
|
||||
if (useBoxBlur) {
|
||||
// special variant for very large sigma, currently only used by retinex algorithm
|
||||
// use iterated boxblur to approximate gaussian blur
|
||||
// Compute ideal averaging filter width and number of iterations
|
||||
@ -1393,10 +1393,10 @@ template<class T> void gaussianBlurImpl(T** src, T** dst, const int W, const int
|
||||
sizes[i] = ((i < m ? wl : wu) - 1) / 2;
|
||||
}
|
||||
|
||||
rtengine::boxblur(src, dst, buffer, sizes[0], sizes[0], W, H);
|
||||
rtengine::boxblur(src, dst, sizes[0], W, H, true);
|
||||
|
||||
for(int i = 1; i < n; i++) {
|
||||
rtengine::boxblur(dst, dst, buffer, sizes[i], sizes[i], W, H);
|
||||
rtengine::boxblur(dst, dst, sizes[i], W, H, true);
|
||||
}
|
||||
} else {
|
||||
if (sigma < GAUSS_SKIP) {
|
||||
@ -1532,8 +1532,8 @@ template<class T> void gaussianBlurImpl(T** src, T** dst, const int W, const int
|
||||
}
|
||||
}
|
||||
|
||||
void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, float *buffer, eGaussType gausstype, float** buffer2)
|
||||
void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, bool useBoxBlur, eGaussType gausstype, float** buffer2)
|
||||
{
|
||||
gaussianBlurImpl<float>(src, dst, W, H, sigma, buffer, gausstype, buffer2);
|
||||
gaussianBlurImpl<float>(src, dst, W, H, sigma, useBoxBlur, gausstype, buffer2);
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,6 @@
|
||||
|
||||
enum eGaussType {GAUSS_STANDARD, GAUSS_MULT, GAUSS_DIV};
|
||||
|
||||
void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, float *buffer = nullptr, eGaussType gausstype = GAUSS_STANDARD, float** buffer2 = nullptr);
|
||||
void gaussianBlur(float** src, float** dst, const int W, const int H, const double sigma, bool useBoxBlur = false, eGaussType gausstype = GAUSS_STANDARD, float** buffer2 = nullptr);
|
||||
|
||||
#endif
|
@ -105,7 +105,7 @@ void guidedFilter(const array2D<float> &guide, const array2D<float> &src, array2
|
||||
[multithread](array2D<float> &d, array2D<float> &s, int rad) -> void
|
||||
{
|
||||
rad = LIM(rad, 0, (min(s.width(), s.height()) - 1) / 2 - 1);
|
||||
boxblur(s, d, rad, s.width(), s.height(), multithread);
|
||||
boxblur(static_cast<float**>(s), static_cast<float**>(d), rad, s.width(), s.height(), multithread);
|
||||
};
|
||||
|
||||
const int W = src.width();
|
||||
|
@ -198,20 +198,20 @@ public:
|
||||
void Median_Denoise(float **src, float **dst, float upperBound, int width, int height, Median medianType, int iterations, int numThreads, float **buffer = nullptr);
|
||||
void RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi);
|
||||
void RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, const bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope);
|
||||
void RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float & maxblueaut, float &minredaut, float & minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread = false);
|
||||
void RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer); //for DCT
|
||||
void RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, bool isRAW, const LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float & maxblueaut, float &minredaut, float & minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread = false);
|
||||
void RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail); //for DCT
|
||||
void RGBoutput_tile_row(float *bloxrow_L, float ** Ldetail, float ** tilemask_out, int height, int width, int top);
|
||||
bool WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge);
|
||||
bool WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb);
|
||||
void WaveletDenoiseAll_info(int levwav, wavelet_decomposition &WaveletCoeffs_a,
|
||||
wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float & minblueaut, int schoice, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc,
|
||||
bool WaveletDenoiseAllL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3], float * vari, int edge);
|
||||
bool WaveletDenoiseAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb);
|
||||
void WaveletDenoiseAll_info(int levwav, const wavelet_decomposition &WaveletCoeffs_a,
|
||||
const wavelet_decomposition &WaveletCoeffs_b, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float & minblueaut, int schoice, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc,
|
||||
float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb);
|
||||
|
||||
bool WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3]);
|
||||
bool WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab,
|
||||
bool WaveletDenoiseAll_BiShrinkL(const wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3]);
|
||||
bool WaveletDenoiseAll_BiShrinkAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float *noisevarchrom, float madL[8][3], float noisevar_ab,
|
||||
const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb);
|
||||
void ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, float *noisevarlum, float * madL, float * vari, int edge);
|
||||
void ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir,
|
||||
void ShrinkAllL(const wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir, float *noisevarlum, float * madL, float * vari, int edge);
|
||||
void ShrinkAllAB(const wavelet_decomposition &WaveletCoeffs_L, const wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir,
|
||||
float *noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb, float * madL, float * madaab = nullptr, bool madCalculated = false);
|
||||
void ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b,
|
||||
int W_ab, int H_ab, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice, int lvl, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc,
|
||||
|
@ -43,6 +43,7 @@
|
||||
|
||||
#include "gauss.h"
|
||||
#include "improcfun.h"
|
||||
#include "jaggedarray.h"
|
||||
#include "median.h"
|
||||
#include "opthelper.h"
|
||||
#include "procparams.h"
|
||||
@ -50,8 +51,6 @@
|
||||
#include "rtengine.h"
|
||||
#include "StopWatch.h"
|
||||
|
||||
#define clipretinex( val, minv, maxv ) (( val = (val < minv ? minv : val ) ) > maxv ? maxv : val )
|
||||
|
||||
namespace
|
||||
{
|
||||
void retinex_scales( float* scales, int nscales, int mode, int s, float high)
|
||||
@ -138,35 +137,33 @@ namespace rtengine
|
||||
|
||||
extern const Settings* settings;
|
||||
|
||||
void RawImageSource::MSR(float** luminance, float** originalLuminance, float **exLuminance, LUTf & mapcurve, bool &mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax)
|
||||
void RawImageSource::MSR(float** luminance, float** originalLuminance, float **exLuminance, const LUTf& mapcurve, bool mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax)
|
||||
{
|
||||
BENCHFUN
|
||||
if (!deh.enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (deh.enabled) {//enabled
|
||||
float maxtr, mintr;
|
||||
constexpr float eps = 2.f;
|
||||
bool useHsl = deh.retinexcolorspace == "HSLLOG";
|
||||
bool useHslLin = deh.retinexcolorspace == "HSLLIN";
|
||||
float offse = (float) deh.offs; //def = 0 not use
|
||||
int iter = deh.iter;
|
||||
int gradient = deh.scal;
|
||||
int scal = 3;//disabled scal
|
||||
int nei = (int) (2.8f * deh.neigh); //def = 220
|
||||
float vart = (float)deh.vart / 100.f;//variance
|
||||
float gradvart = (float)deh.grad;
|
||||
float gradstr = (float)deh.grads;
|
||||
float strength = (float) deh.str / 100.f; // Blend with original L channel data
|
||||
float limD = (float) deh.limd;
|
||||
const bool useHsl = deh.retinexcolorspace == "HSLLOG";
|
||||
const bool useHslLin = deh.retinexcolorspace == "HSLLIN";
|
||||
const float offse = deh.offs; //def = 0 not use
|
||||
const int iter = deh.iter;
|
||||
const int gradient = deh.scal;
|
||||
int scal = deh.skal;
|
||||
const int nei = 2.8f * deh.neigh; //def = 220
|
||||
const float vart = deh.vart / 100.f;//variance
|
||||
const float gradvart = deh.grad;
|
||||
const float gradstr = deh.grads;
|
||||
const float strength = deh.str / 100.f; // Blend with original L channel data
|
||||
float limD = deh.limd;
|
||||
limD = pow(limD, 1.7f);//about 2500 enough
|
||||
limD *= useHslLin ? 10.f : 1.f;
|
||||
float ilimD = 1.f / limD;
|
||||
float hig = ((float) deh.highl) / 100.f;
|
||||
scal = deh.skal;
|
||||
const float ilimD = 1.f / limD;
|
||||
const float hig = deh.highl / 100.f;
|
||||
|
||||
int H_L = height;
|
||||
int W_L = width;
|
||||
|
||||
float *tran[H_L] ALIGNED16;
|
||||
float *tranBuffer = nullptr;
|
||||
const int H_L = height;
|
||||
const int W_L = width;
|
||||
|
||||
constexpr float elogt = 2.71828f;
|
||||
bool lhutili = false;
|
||||
@ -199,30 +196,29 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
|
||||
constexpr float aahi = 49.f / 99.f; ////reduce sensibility 50%
|
||||
constexpr float bbhi = 1.f - aahi;
|
||||
|
||||
for(int it = 1; it < iter + 1; it++) { //iter nb max of iterations
|
||||
float high = bbhi + aahi * (float) deh.highl;
|
||||
|
||||
for (int it = 1; it < iter + 1; it++) { //iter nb max of iterations
|
||||
float grad = 1.f;
|
||||
float sc = scal;
|
||||
|
||||
if(gradient == 0) {
|
||||
if (gradient == 0) {
|
||||
grad = 1.f;
|
||||
sc = 3.f;
|
||||
} else if(gradient == 1) {
|
||||
} else if (gradient == 1) {
|
||||
grad = 0.25f * it + 0.75f;
|
||||
sc = -0.5f * it + 4.5f;
|
||||
} else if(gradient == 2) {
|
||||
} else if (gradient == 2) {
|
||||
grad = 0.5f * it + 0.5f;
|
||||
sc = -0.75f * it + 5.75f;
|
||||
} else if(gradient == 3) {
|
||||
} else if (gradient == 3) {
|
||||
grad = 0.666f * it + 0.333f;
|
||||
sc = -0.75f * it + 5.75f;
|
||||
} else if(gradient == 4) {
|
||||
} else if (gradient == 4) {
|
||||
grad = 0.8f * it + 0.2f;
|
||||
sc = -0.75f * it + 5.75f;
|
||||
} else if(gradient == 5) {
|
||||
if(moderetinex != 3) {
|
||||
} else if (gradient == 5) {
|
||||
if (moderetinex != 3) {
|
||||
grad = 2.5f * it - 1.5f;
|
||||
} else {
|
||||
float aa = (11.f * high - 1.f) / 4.f;
|
||||
@ -231,8 +227,8 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
|
||||
sc = -0.75f * it + 5.75f;
|
||||
} else if(gradient == 6) {
|
||||
if(moderetinex != 3) {
|
||||
} else if (gradient == 6) {
|
||||
if (moderetinex != 3) {
|
||||
grad = 5.f * it - 4.f;
|
||||
} else {
|
||||
float aa = (21.f * high - 1.f) / 4.f;
|
||||
@ -241,26 +237,21 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
|
||||
sc = -0.75f * it + 5.75f;
|
||||
}
|
||||
|
||||
else if(gradient == -1) {
|
||||
} else if (gradient == -1) {
|
||||
grad = -0.125f * it + 1.125f;
|
||||
sc = 3.f;
|
||||
}
|
||||
|
||||
if(iter == 1) {
|
||||
if (iter == 1) {
|
||||
sc = scal;
|
||||
} else {
|
||||
//adjust sc in function of choice of scale by user if iterations
|
||||
if(scal < 3) {
|
||||
if (scal < 3) {
|
||||
sc -= 1;
|
||||
|
||||
if(sc < 1.f) {//avoid 0
|
||||
if (sc < 1.f) {//avoid 0
|
||||
sc = 1.f;
|
||||
}
|
||||
}
|
||||
|
||||
if(scal > 4) {
|
||||
} else if (scal > 4) {
|
||||
sc += 1;
|
||||
}
|
||||
}
|
||||
@ -269,20 +260,20 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
float limdx = limD;
|
||||
float ilimdx = ilimD;
|
||||
|
||||
if(gradvart != 0) {
|
||||
if(gradvart == 1) {
|
||||
if (gradvart != 0) {
|
||||
if (gradvart == 1) {
|
||||
varx = vart * (-0.125f * it + 1.125f);
|
||||
limdx = limD * (-0.125f * it + 1.125f);
|
||||
ilimdx = 1.f / limdx;
|
||||
} else if(gradvart == 2) {
|
||||
} else if (gradvart == 2) {
|
||||
varx = vart * (-0.2f * it + 1.2f);
|
||||
limdx = limD * (-0.2f * it + 1.2f);
|
||||
ilimdx = 1.f / limdx;
|
||||
} else if(gradvart == -1) {
|
||||
} else if (gradvart == -1) {
|
||||
varx = vart * (0.125f * it + 0.875f);
|
||||
limdx = limD * (0.125f * it + 0.875f);
|
||||
ilimdx = 1.f / limdx;
|
||||
} else if(gradvart == -2) {
|
||||
} else if (gradvart == -2) {
|
||||
varx = vart * (0.4f * it + 0.6f);
|
||||
limdx = limD * (0.4f * it + 0.6f);
|
||||
ilimdx = 1.f / limdx;
|
||||
@ -292,27 +283,27 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
scal = round(sc);
|
||||
float ks = 1.f;
|
||||
|
||||
if(gradstr != 0) {
|
||||
if(gradstr == 1) {
|
||||
if(it <= 3) {
|
||||
if (gradstr != 0) {
|
||||
if (gradstr == 1) {
|
||||
if (it <= 3) {
|
||||
ks = -0.3f * it + 1.6f;
|
||||
} else {
|
||||
ks = 0.5f;
|
||||
}
|
||||
} else if(gradstr == 2) {
|
||||
if(it <= 3) {
|
||||
} else if (gradstr == 2) {
|
||||
if (it <= 3) {
|
||||
ks = -0.6f * it + 2.2f;
|
||||
} else {
|
||||
ks = 0.3f;
|
||||
}
|
||||
} else if(gradstr == -1) {
|
||||
if(it <= 3) {
|
||||
} else if (gradstr == -1) {
|
||||
if (it <= 3) {
|
||||
ks = 0.2f * it + 0.6f;
|
||||
} else {
|
||||
ks = 1.2f;
|
||||
}
|
||||
} else if(gradstr == -2) {
|
||||
if(it <= 3) {
|
||||
} else if (gradstr == -2) {
|
||||
if (it <= 3) {
|
||||
ks = 0.4f * it + 0.2f;
|
||||
} else {
|
||||
ks = 1.5f;
|
||||
@ -320,24 +311,15 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
}
|
||||
|
||||
float strengthx = ks * strength;
|
||||
const float strengthx = ks * strength;
|
||||
|
||||
constexpr auto maxRetinexScales = 8;
|
||||
float RetinexScales[maxRetinexScales];
|
||||
|
||||
retinex_scales( RetinexScales, scal, moderetinex, nei / grad, high );
|
||||
retinex_scales(RetinexScales, scal, moderetinex, nei / grad, high);
|
||||
|
||||
float *src[H_L] ALIGNED16;
|
||||
float *srcBuffer = new float[H_L * W_L];
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
src[i] = &srcBuffer[i * W_L];
|
||||
}
|
||||
|
||||
int h_th = 0, s_th = 0;
|
||||
|
||||
int shHighlights = deh.highlights;
|
||||
int shShadows = deh.shadows;
|
||||
const int shHighlights = deh.highlights;
|
||||
const int shShadows = deh.shadows;
|
||||
|
||||
int mapmet = 0;
|
||||
|
||||
@ -363,6 +345,9 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
viewmet = 4;
|
||||
}
|
||||
|
||||
std::unique_ptr<JaggedArray<float>> srcBuffer(new JaggedArray<float>(W_L, H_L));
|
||||
float** src = *(srcBuffer.get());
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
@ -373,20 +358,8 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
luminance[i][j] = 0.f;
|
||||
}
|
||||
|
||||
float *out[H_L] ALIGNED16;
|
||||
float *outBuffer = new float[H_L * W_L];
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
out[i] = &outBuffer[i * W_L];
|
||||
}
|
||||
|
||||
if(viewmet == 3 || viewmet == 2) {
|
||||
tranBuffer = new float[H_L * W_L];
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
tran[i] = &tranBuffer[i * W_L];
|
||||
}
|
||||
}
|
||||
JaggedArray<float> out(W_L, H_L);
|
||||
JaggedArray<float>& tran = out; // tran and out can safely use the same buffer
|
||||
|
||||
const float logBetaGain = xlogf(16384.f);
|
||||
float pond = logBetaGain / (float) scal;
|
||||
@ -395,24 +368,24 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
pond /= log(elogt);
|
||||
}
|
||||
|
||||
auto shmap = ((mapmet == 2 || mapmet == 3 || mapmet == 4) && it == 1) ? new SHMap (W_L, H_L) : nullptr;
|
||||
std::unique_ptr<SHMap> shmap;
|
||||
if (((mapmet == 2 || mapmet == 3 || mapmet == 4) && it == 1)) {
|
||||
shmap.reset(new SHMap(W_L, H_L));
|
||||
}
|
||||
|
||||
float *buffer = new float[W_L * H_L];;
|
||||
std::unique_ptr<float[]> buffer;
|
||||
if (mapmet > 0) {
|
||||
buffer.reset(new float[W_L * H_L]);
|
||||
}
|
||||
|
||||
for ( int scale = scal - 1; scale >= 0; scale-- ) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
if(scale == scal - 1)
|
||||
{
|
||||
gaussianBlur (src, out, W_L, H_L, RetinexScales[scale], buffer);
|
||||
for (int scale = scal - 1; scale >= 0; --scale) {
|
||||
if (scale == scal - 1) {
|
||||
gaussianBlur(src, out, W_L, H_L, RetinexScales[scale], true);
|
||||
} else { // reuse result of last iteration
|
||||
// out was modified in last iteration => restore it
|
||||
if((((mapmet == 2 && scale > 1) || mapmet == 3 || mapmet == 4) || (mapmet > 0 && mapcontlutili)) && it == 1)
|
||||
{
|
||||
if((((mapmet == 2 && scale > 1) || mapmet == 3 || mapmet == 4) || (mapmet > 0 && mapcontlutili)) && it == 1) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
@ -422,13 +395,13 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
}
|
||||
|
||||
gaussianBlur (out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), buffer);
|
||||
gaussianBlur(out, out, W_L, H_L, sqrtf(SQR(RetinexScales[scale]) - SQR(RetinexScales[scale + 1])), true);
|
||||
}
|
||||
if((((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) || (mapmet > 0 && mapcontlutili)) && it == 1 && scale > 0)
|
||||
{
|
||||
// out will be modified => store it for use in next iteration. We even don't need a new buffer because 'buffer' is free after gaussianBlur :)
|
||||
|
||||
if ((((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) || (mapmet > 0 && mapcontlutili)) && it == 1 && scale > 0) {
|
||||
// out will be modified => store it for use in next iteration.
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
@ -437,60 +410,48 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) {
|
||||
shmap->updateL (out, shradius, true, 1);
|
||||
int h_th = 0;
|
||||
int s_th = 0;
|
||||
if (((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) {
|
||||
shmap->updateL(out, shradius, true, 1);
|
||||
h_th = shmap->max_f - deh.htonalwidth * (shmap->max_f - shmap->avg) / 100;
|
||||
s_th = deh.stonalwidth * (shmap->avg - shmap->min_f) / 100;
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
vfloat pondv = F2V(pond);
|
||||
vfloat limMinv = F2V(ilimdx);
|
||||
vfloat limMaxv = F2V(limdx);
|
||||
|
||||
#endif
|
||||
|
||||
if(mapmet > 0 && mapcontlutili && it == 1) {
|
||||
// TODO: When rgbcurvespeedup branch is merged into master we can simplify the code by
|
||||
// 1) in rawimagesource.retinexPrepareCurves() insert
|
||||
// mapcurve *= 0.5f;
|
||||
// after
|
||||
// CurveFactory::mapcurve (mapcontlutili, retinexParams.mapcurve, mapcurve, 1, lhist16RETI, histLRETI);
|
||||
// 2) remove the division by 2.f from the code 7 lines below this line
|
||||
if (mapmet > 0 && mapcontlutili && it == 1) {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
for (int j = 0; j < W_L; j++) {
|
||||
out[i][j] = mapcurve[2.f * out[i][j]] / 2.f;
|
||||
out[i][j] = mapcurve[2.f * out[i][j]];
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) {
|
||||
float hWeight = (100.f - shHighlights) / 100.f;
|
||||
float sWeight = (100.f - shShadows) / 100.f;
|
||||
if (((mapmet == 2 && scale > 2) || mapmet == 3 || mapmet == 4) && it == 1) {
|
||||
const float hWeight = (100.f - shHighlights) / 100.f;
|
||||
const float sWeight = (100.f - shShadows) / 100.f;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for schedule(dynamic,16)
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H_L; i++) {
|
||||
for (int j = 0; j < W_L; j++) {
|
||||
float mapval = 1.f + shmap->map[i][j];
|
||||
float factor = 1.f;
|
||||
const float mapval = 1.f + shmap->map[i][j];
|
||||
float factor;
|
||||
|
||||
if (mapval > h_th) {
|
||||
factor = (h_th + hWeight * (mapval - h_th)) / mapval;
|
||||
} else if (mapval < s_th) {
|
||||
factor = (s_th - sWeight * (s_th - mapval)) / mapval;
|
||||
} else {
|
||||
factor = 1.f;
|
||||
}
|
||||
|
||||
out[i][j] *= factor;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -503,14 +464,16 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
int j = 0;
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
if(useHslLin) {
|
||||
const vfloat pondv = F2V(pond);
|
||||
const vfloat limMinv = F2V(ilimdx);
|
||||
const vfloat limMaxv = F2V(limdx);
|
||||
if( useHslLin) {
|
||||
for (; j < W_L - 3; j += 4) {
|
||||
_mm_storeu_ps(&luminance[i][j], LVFU(luminance[i][j]) + pondv * (vclampf(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv) ));
|
||||
STVFU(luminance[i][j], LVFU(luminance[i][j]) + pondv * vclampf(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv));
|
||||
}
|
||||
} else {
|
||||
for (; j < W_L - 3; j += 4) {
|
||||
_mm_storeu_ps(&luminance[i][j], LVFU(luminance[i][j]) + pondv * xlogf(vclampf(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv) ));
|
||||
STVFU(luminance[i][j], LVFU(luminance[i][j]) + pondv * xlogf(vclampf(LVFU(src[i][j]) / LVFU(out[i][j]), limMinv, limMaxv)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -518,7 +481,7 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
|
||||
if(useHslLin) {
|
||||
for (; j < W_L; j++) {
|
||||
luminance[i][j] += pond * (LIM(src[i][j] / out[i][j], ilimdx, limdx));
|
||||
luminance[i][j] += pond * LIM(src[i][j] / out[i][j], ilimdx, limdx);
|
||||
}
|
||||
} else {
|
||||
for (; j < W_L; j++) {
|
||||
@ -528,22 +491,14 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
}
|
||||
}
|
||||
|
||||
if(mapmet > 1) {
|
||||
if(shmap) {
|
||||
delete shmap;
|
||||
}
|
||||
}
|
||||
|
||||
shmap = nullptr;
|
||||
|
||||
delete [] buffer;
|
||||
delete [] srcBuffer;
|
||||
srcBuffer.reset();
|
||||
|
||||
float mean = 0.f;
|
||||
float stddv = 0.f;
|
||||
// I call mean_stddv2 instead of mean_stddv ==> logBetaGain
|
||||
|
||||
mean_stddv2( luminance, mean, stddv, W_L, H_L, maxtr, mintr);
|
||||
float maxtr, mintr;
|
||||
mean_stddv2(luminance, mean, stddv, W_L, H_L, maxtr, mintr);
|
||||
//printf("mean=%f std=%f delta=%f maxtr=%f mintr=%f\n", mean, stddv, delta, maxtr, mintr);
|
||||
|
||||
//mean_stddv( luminance, mean, stddv, W_L, H_L, logBetaGain, maxtr, mintr);
|
||||
@ -563,16 +518,12 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
bmin *= 500.f;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
float absciss;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for schedule(dynamic,16)
|
||||
#pragma omp parallel for schedule(dynamic,16)
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < H_L; i++ )
|
||||
for (int i = 0; i < H_L; i++ ) {
|
||||
for (int j = 0; j < W_L; j++) { //for mintr to maxtr evalate absciss in function of original transmission
|
||||
float absciss;
|
||||
if (LIKELY(fabsf(luminance[i][j] - mean) < stddv)) {
|
||||
absciss = asig * luminance[i][j] + bsig;
|
||||
} else if (luminance[i][j] >= mean) {
|
||||
@ -592,22 +543,15 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
|
||||
// median filter on transmission ==> reduce artifacts
|
||||
if (deh.medianmap && it == 1) { //only one time
|
||||
int wid = W_L;
|
||||
int hei = H_L;
|
||||
float *tmL[hei] ALIGNED16;
|
||||
float *tmLBuffer = new float[wid * hei];
|
||||
int borderL = 1;
|
||||
|
||||
for (int i = 0; i < hei; i++) {
|
||||
tmL[i] = &tmLBuffer[i * wid];
|
||||
}
|
||||
JaggedArray<float> tmL(W_L, H_L);
|
||||
constexpr int borderL = 1;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int i = borderL; i < hei - borderL; i++) {
|
||||
for (int j = borderL; j < wid - borderL; j++) {
|
||||
for (int i = borderL; i < H_L - borderL; i++) {
|
||||
for (int j = borderL; j < W_L - borderL; j++) {
|
||||
tmL[i][j] = median(luminance[i][j], luminance[i - 1][j], luminance[i + 1][j], luminance[i][j + 1], luminance[i][j - 1], luminance[i - 1][j - 1], luminance[i - 1][j + 1], luminance[i + 1][j - 1], luminance[i + 1][j + 1]); //3x3
|
||||
}
|
||||
}
|
||||
@ -616,23 +560,19 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int i = borderL; i < hei - borderL; i++ ) {
|
||||
for (int j = borderL; j < wid - borderL; j++) {
|
||||
for (int i = borderL; i < H_L - borderL; i++ ) {
|
||||
for (int j = borderL; j < W_L - borderL; j++) {
|
||||
luminance[i][j] = tmL[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
delete [] tmLBuffer;
|
||||
|
||||
}
|
||||
|
||||
// I call mean_stddv2 instead of mean_stddv ==> logBetaGain
|
||||
//mean_stddv( luminance, mean, stddv, W_L, H_L, 1.f, maxtr, mintr);
|
||||
mean_stddv2( luminance, mean, stddv, W_L, H_L, maxtr, mintr);
|
||||
|
||||
mean_stddv2(luminance, mean, stddv, W_L, H_L, maxtr, mintr);
|
||||
}
|
||||
|
||||
float epsil = 0.1f;
|
||||
constexpr float epsil = 0.1f;
|
||||
|
||||
mini = mean - varx * stddv;
|
||||
|
||||
@ -653,14 +593,11 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
delta = 1.0f;
|
||||
}
|
||||
|
||||
float cdfactor = 32768.f / delta;
|
||||
maxCD = -9999999.f;
|
||||
minCD = 9999999.f;
|
||||
// coeff for auto "transmission" with 2 sigma #95% data
|
||||
float aza = 16300.f / (2.f * stddv);
|
||||
float azb = -aza * (mean - 2.f * stddv);
|
||||
float bza = 16300.f / (2.f * stddv);
|
||||
float bzb = 16300.f - bza * (mean);
|
||||
const float aza = 16300.f / (2.f * stddv);
|
||||
const float azb = -aza * (mean - 2.f * stddv);
|
||||
const float bza = 16300.f / (2.f * stddv);
|
||||
const float bzb = 16300.f - bza * (mean);
|
||||
|
||||
//prepare work for curve gain
|
||||
#ifdef _OPENMP
|
||||
@ -677,7 +614,7 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
stddv = 0.f;
|
||||
// I call mean_stddv2 instead of mean_stddv ==> logBetaGain
|
||||
|
||||
mean_stddv2( luminance, mean, stddv, W_L, H_L, maxtr, mintr);
|
||||
mean_stddv2(luminance, mean, stddv, W_L, H_L, maxtr, mintr);
|
||||
float asig = 0.f, bsig = 0.f, amax = 0.f, bmax = 0.f, amin = 0.f, bmin = 0.f;
|
||||
|
||||
if (dehagaintransmissionCurve && mean != 0.f && stddv != 0.f) { //if curve
|
||||
@ -696,16 +633,15 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
bmin *= 500.f;
|
||||
}
|
||||
|
||||
const float cdfactor = 32768.f / delta;
|
||||
maxCD = -9999999.f;
|
||||
minCD = 9999999.f;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
float cdmax = -999999.f, cdmin = 999999.f;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for schedule(dynamic,16) nowait
|
||||
#pragma omp parallel for reduction(max:maxCD) reduction(min:minCD) schedule(dynamic, 16)
|
||||
#endif
|
||||
|
||||
for ( int i = 0; i < H_L; i ++ )
|
||||
for ( int i = 0; i < H_L; i ++ ) {
|
||||
for (int j = 0; j < W_L; j++) {
|
||||
float gan;
|
||||
|
||||
@ -723,68 +659,56 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
|
||||
// float cd = cdfactor * ( luminance[i][j] - mini ) + offse;
|
||||
// TODO : move multiplication by 2.f inside the curve
|
||||
gan = 2.f * (dehagaintransmissionCurve[absciss]); //new gain function transmission
|
||||
gan = 2.f * dehagaintransmissionCurve[absciss]; //new gain function transmission
|
||||
} else {
|
||||
gan = 0.5f;
|
||||
}
|
||||
|
||||
float cd = gan * cdfactor * ( luminance[i][j] ) + offse;
|
||||
const float cd = gan * cdfactor * luminance[i][j] + offse;
|
||||
|
||||
cdmax = cd > cdmax ? cd : cdmax;
|
||||
cdmin = cd < cdmin ? cd : cdmin;
|
||||
maxCD = cd > maxCD ? cd : maxCD;
|
||||
minCD = cd < minCD ? cd : minCD;
|
||||
|
||||
float str = strengthx;
|
||||
|
||||
if(lhutili && it == 1) { // S=f(H)
|
||||
if (lhutili && it == 1) { // S=f(H)
|
||||
{
|
||||
float HH = exLuminance[i][j];
|
||||
const float HH = exLuminance[i][j];
|
||||
float valparam;
|
||||
|
||||
if(useHsl || useHslLin) {
|
||||
valparam = float((shcurve->getVal(HH) - 0.5f));
|
||||
valparam = shcurve->getVal(HH) - 0.5f;
|
||||
} else {
|
||||
valparam = float((shcurve->getVal(Color::huelab_to_huehsv2(HH)) - 0.5f));
|
||||
valparam = shcurve->getVal(Color::huelab_to_huehsv2(HH)) - 0.5f;
|
||||
}
|
||||
|
||||
str *= (1.f + 2.f * valparam);
|
||||
}
|
||||
}
|
||||
|
||||
if(higplus && exLuminance[i][j] > 65535.f * hig) {
|
||||
if (higplus && exLuminance[i][j] > 65535.f * hig) {
|
||||
str *= hig;
|
||||
}
|
||||
|
||||
if(viewmet == 0) {
|
||||
luminance[i][j] = intp(str, clipretinex( cd, 0.f, 32768.f ), originalLuminance[i][j]);
|
||||
} else if(viewmet == 1) {
|
||||
if (viewmet == 0) {
|
||||
luminance[i][j] = intp(str, LIM(cd, 0.f, 32768.f), originalLuminance[i][j]);
|
||||
} else if (viewmet == 1) {
|
||||
luminance[i][j] = out[i][j];
|
||||
} else if(viewmet == 4) {
|
||||
} else if (viewmet == 4) {
|
||||
luminance[i][j] = originalLuminance[i][j] + str * (originalLuminance[i][j] - out[i][j]);//unsharp
|
||||
} else if(viewmet == 2) {
|
||||
} else if (viewmet == 2) {
|
||||
if(tran[i][j] <= mean) {
|
||||
luminance[i][j] = azb + aza * tran[i][j]; //auto values
|
||||
} else {
|
||||
luminance[i][j] = bzb + bza * tran[i][j];
|
||||
}
|
||||
} else { /*if(viewmet == 3) */
|
||||
} else { /*if (viewmet == 3) */
|
||||
luminance[i][j] = 1000.f + tran[i][j] * 700.f; //arbitrary values to help display log values which are between -20 to + 30 - usage values -4 + 5
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp critical
|
||||
#endif
|
||||
{
|
||||
maxCD = maxCD > cdmax ? maxCD : cdmax;
|
||||
minCD = minCD < cdmin ? minCD : cdmin;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
delete [] outBuffer;
|
||||
outBuffer = nullptr;
|
||||
//printf("cdmin=%f cdmax=%f\n",minCD, maxCD);
|
||||
Tmean = mean;
|
||||
Tsigma = stddv;
|
||||
Tmin = mintr;
|
||||
@ -795,12 +719,6 @@ void RawImageSource::MSR(float** luminance, float** originalLuminance, float **e
|
||||
shcurve = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if(tranBuffer) {
|
||||
delete [] tranBuffer;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -207,13 +207,13 @@ BENCHFUN
|
||||
for (int k = 0; k < sharpenParam.deconviter; k++) {
|
||||
if (!needdamp) {
|
||||
// apply gaussian blur and divide luminance by result of gaussian blur
|
||||
gaussianBlur(tmpI, tmp, W, H, sigma, nullptr, GAUSS_DIV, luminance);
|
||||
gaussianBlur(tmpI, tmp, W, H, sigma, false, GAUSS_DIV, luminance);
|
||||
} else {
|
||||
// apply gaussian blur + damping
|
||||
gaussianBlur(tmpI, tmp, W, H, sigma);
|
||||
dcdamping(tmp, luminance, damping, W, H);
|
||||
}
|
||||
gaussianBlur(tmp, tmpI, W, H, sigma, nullptr, GAUSS_MULT);
|
||||
gaussianBlur(tmp, tmpI, W, H, sigma, false, GAUSS_MULT);
|
||||
} // end for
|
||||
|
||||
#ifdef _OPENMP
|
||||
|
@ -32,7 +32,6 @@
|
||||
#include "improcfun.h"
|
||||
#include "LUT.h"
|
||||
#include "array2D.h"
|
||||
#include "boxblur.h"
|
||||
#include "rt_math.h"
|
||||
#include "mytime.h"
|
||||
#include "sleef.c"
|
||||
|
@ -1950,8 +1950,8 @@ void RawImageSource::retinexPrepareCurves(const RetinexParams &retinexParams, LU
|
||||
CurveFactory::curveDehaContL (retinexcontlutili, retinexParams.cdcurve, cdcurve, 1, lhist16RETI, histLRETI);
|
||||
}
|
||||
|
||||
CurveFactory::mapcurve (mapcontlutili, retinexParams.mapcurve, mapcurve, 1, lhist16RETI, histLRETI);
|
||||
|
||||
CurveFactory::mapcurve(mapcontlutili, retinexParams.mapcurve, mapcurve, 1, lhist16RETI, histLRETI);
|
||||
mapcurve *= 0.5f;
|
||||
retinexParams.getCurves(retinextransmissionCurve, retinexgaintransmissionCurve);
|
||||
}
|
||||
|
||||
|
@ -201,7 +201,7 @@ public:
|
||||
}
|
||||
static void inverse33 (const double (*coeff)[3], double (*icoeff)[3]);
|
||||
|
||||
void MSR(float** luminance, float **originalLuminance, float **exLuminance, LUTf & mapcurve, bool &mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax);
|
||||
void MSR(float** luminance, float **originalLuminance, float **exLuminance, const LUTf& mapcurve, bool mapcontlutili, int width, int height, const RetinexParams &deh, const RetinextransmissionCurve & dehatransmissionCurve, const RetinexgaintransmissionCurve & dehagaintransmissionCurve, float &minCD, float &maxCD, float &mini, float &maxi, float &Tmean, float &Tsigma, float &Tmin, float &Tmax);
|
||||
void HLRecovery_inpaint (float** red, float** green, float** blue) override;
|
||||
static void HLRecovery_Luminance (float* rin, float* gin, float* bin, float* rout, float* gout, float* bout, int width, float maxval);
|
||||
static void HLRecovery_CIELab (float* rin, float* gin, float* bin, float* rout, float* gout, float* bout, int width, float maxval, double cam[3][3], double icam[3][3]);
|
||||
|
@ -21,6 +21,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "array2D.h"
|
||||
#include "rt_math.h"
|
||||
|
||||
namespace rtengine {
|
||||
|
||||
|
@ -85,21 +85,14 @@ void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int
|
||||
if (!hq) {
|
||||
fillLuminance( img, map, lumi);
|
||||
|
||||
float *buffer = nullptr;
|
||||
|
||||
if(radius > 40.) {
|
||||
// When we pass another buffer to gaussianBlur, it will use iterated boxblur which is less prone to artifacts
|
||||
buffer = new float[W * H];
|
||||
}
|
||||
const bool useBoxBlur = radius > 40.0; // boxblur is less prone to artifacts for large radi
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#pragma omp parallel if (!useBoxBlur)
|
||||
#endif
|
||||
{
|
||||
gaussianBlur (map, map, W, H, radius, buffer);
|
||||
gaussianBlur (map, map, W, H, radius, useBoxBlur);
|
||||
}
|
||||
|
||||
delete [] buffer;
|
||||
}
|
||||
|
||||
else {
|
||||
|
Loading…
x
Reference in New Issue
Block a user