Removed most of the __x86_64__ dependencies. Also removed _RT_NESTED_OPENMP
This commit is contained in:
parent
4046a3c9b9
commit
96bf06a950
@ -291,7 +291,7 @@ bool MultiDiagonalSymmetricMatrix::LazySetEntry(float value, int row, int column
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void MultiDiagonalSymmetricMatrix::VectorProduct(float* RESTRICT Product, float* RESTRICT x)
|
void MultiDiagonalSymmetricMatrix::VectorProduct(float* RESTRICT Product, float* RESTRICT x)
|
||||||
{
|
{
|
||||||
|
|
||||||
int srm = StartRows[m - 1];
|
int srm = StartRows[m - 1];
|
||||||
@ -693,7 +693,7 @@ EdgePreservingDecomposition::~EdgePreservingDecomposition()
|
|||||||
delete A;
|
delete A;
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float EdgeStopping, int Iterates, float *Blur, bool UseBlurForEdgeStop)
|
float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float EdgeStopping, int Iterates, float *Blur, bool UseBlurForEdgeStop)
|
||||||
{
|
{
|
||||||
|
|
||||||
if(Blur == nullptr)
|
if(Blur == nullptr)
|
||||||
@ -884,7 +884,7 @@ float *EdgePreservingDecomposition::CreateIteratedBlur(float *Source, float Scal
|
|||||||
return Blur;
|
return Blur;
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void EdgePreservingDecomposition::CompressDynamicRange(float *Source, float Scale, float EdgeStopping, float CompressionExponent, float DetailBoost, int Iterates, int Reweightings)
|
void EdgePreservingDecomposition::CompressDynamicRange(float *Source, float Scale, float EdgeStopping, float CompressionExponent, float DetailBoost, int Iterates, int Reweightings)
|
||||||
{
|
{
|
||||||
if(w < 300 && h < 300) { // set number of Reweightings to zero for small images (thumbnails). We could try to find a better solution here.
|
if(w < 300 && h < 300) { // set number of Reweightings to zero for small images (thumbnails). We could try to find a better solution here.
|
||||||
Reweightings = 0;
|
Reweightings = 0;
|
||||||
|
@ -474,7 +474,7 @@ void ImProcFunctions::Tile_calc(int tilesize, int overlap, int kall, int imwidth
|
|||||||
int denoiseNestedLevels = 1;
|
int denoiseNestedLevels = 1;
|
||||||
enum nrquality {QUALITY_STANDARD, QUALITY_HIGH};
|
enum nrquality {QUALITY_STANDARD, QUALITY_HIGH};
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi)
|
void ImProcFunctions::RGB_denoise(int kall, Imagefloat * src, Imagefloat * dst, Imagefloat * calclum, float * ch_M, float *max_r, float *max_b, bool isRAW, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, const NoiseCurve & noiseLCurve, const NoiseCurve & noiseCCurve, float &nresi, float &highresi)
|
||||||
{
|
{
|
||||||
BENCHFUN
|
BENCHFUN
|
||||||
//#ifdef _DEBUG
|
//#ifdef _DEBUG
|
||||||
@ -783,7 +783,7 @@ BENCHFUN
|
|||||||
numthreads = MIN(numthreads, options.rgbDenoiseThreadLimit);
|
numthreads = MIN(numthreads, options.rgbDenoiseThreadLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
denoiseNestedLevels = omp_get_max_threads() / numthreads;
|
denoiseNestedLevels = omp_get_max_threads() / numthreads;
|
||||||
bool oldNested = omp_get_nested();
|
bool oldNested = omp_get_nested();
|
||||||
|
|
||||||
@ -916,7 +916,7 @@ BENCHFUN
|
|||||||
|
|
||||||
if (!denoiseMethodRgb) { //lab mode
|
if (!denoiseMethodRgb) { //lab mode
|
||||||
//modification Jacques feb 2013 and july 2014
|
//modification Jacques feb 2013 and july 2014
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -964,7 +964,7 @@ BENCHFUN
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {//RGB mode
|
} else {//RGB mode
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -999,7 +999,7 @@ BENCHFUN
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {//image is not raw; use Lab parametrization
|
} else {//image is not raw; use Lab parametrization
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1159,7 +1159,7 @@ BENCHFUN
|
|||||||
if (!memoryAllocationFailed) {
|
if (!memoryAllocationFailed) {
|
||||||
// precalculate madL, because it's used in adecomp and bdecomp
|
// precalculate madL, because it's used in adecomp and bdecomp
|
||||||
int maxlvl = Ldecomp->maxlevel();
|
int maxlvl = Ldecomp->maxlevel();
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1284,7 +1284,7 @@ BENCHFUN
|
|||||||
if (!memoryAllocationFailed) {
|
if (!memoryAllocationFailed) {
|
||||||
// copy labdn->L to Lin before it gets modified by reconstruction
|
// copy labdn->L to Lin before it gets modified by reconstruction
|
||||||
Lin = new array2D<float>(width, height);
|
Lin = new array2D<float>(width, height);
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1337,14 +1337,14 @@ BENCHFUN
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
int masterThread = omp_get_thread_num();
|
int masterThread = omp_get_thread_num();
|
||||||
#endif
|
#endif
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
int subThread = masterThread * denoiseNestedLevels + omp_get_thread_num();
|
int subThread = masterThread * denoiseNestedLevels + omp_get_thread_num();
|
||||||
#else
|
#else
|
||||||
int subThread = 0;
|
int subThread = 0;
|
||||||
@ -1354,7 +1354,7 @@ BENCHFUN
|
|||||||
float *fLblox = fLbloxArray[subThread];
|
float *fLblox = fLbloxArray[subThread];
|
||||||
float pBuf[width + TS + 2 * blkrad * offset] ALIGNED16;
|
float pBuf[width + TS + 2 * blkrad * offset] ALIGNED16;
|
||||||
float nbrwt[TS * TS] ALIGNED64;
|
float nbrwt[TS * TS] ALIGNED64;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1454,7 +1454,7 @@ BENCHFUN
|
|||||||
}
|
}
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel for num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1591,7 +1591,7 @@ BENCHFUN
|
|||||||
realred /= 100.f;
|
realred /= 100.f;
|
||||||
realblue /= 100.f;
|
realblue /= 100.f;
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for schedule(dynamic,16) num_threads(denoiseNestedLevels)
|
#pragma omp parallel for schedule(dynamic,16) num_threads(denoiseNestedLevels)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1641,7 +1641,7 @@ BENCHFUN
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {//RGB mode
|
} else {//RGB mode
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(denoiseNestedLevels)
|
#pragma omp parallel for num_threads(denoiseNestedLevels)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1681,7 +1681,7 @@ BENCHFUN
|
|||||||
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(denoiseNestedLevels)
|
#pragma omp parallel for num_threads(denoiseNestedLevels)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1748,7 +1748,7 @@ BENCHFUN
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
omp_set_nested(oldNested);
|
omp_set_nested(oldNested);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2043,7 +2043,7 @@ BENCHFUN
|
|||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT
|
void ImProcFunctions::RGBtile_denoise(float * fLblox, int hblproc, float noisevar_Ldetail, float * nbrwt, float * blurbuffer) //for DCT
|
||||||
{
|
{
|
||||||
int blkstart = hblproc * TS * TS;
|
int blkstart = hblproc * TS * TS;
|
||||||
|
|
||||||
@ -2254,7 +2254,7 @@ void ImProcFunctions::Noise_residualAB(wavelet_decomposition &WaveletCoeffs_ab,
|
|||||||
chmaxresid = maxresid;
|
chmaxresid = maxresid;
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3])
|
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposition &WaveletCoeffs_L, float *noisevarlum, float madL[8][3])
|
||||||
{
|
{
|
||||||
int maxlvl = min(WaveletCoeffs_L.maxlevel(), 5);
|
int maxlvl = min(WaveletCoeffs_L.maxlevel(), 5);
|
||||||
const float eps = 0.01f;
|
const float eps = 0.01f;
|
||||||
@ -2272,7 +2272,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposit
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool memoryAllocationFailed = false;
|
bool memoryAllocationFailed = false;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -2287,7 +2287,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposit
|
|||||||
|
|
||||||
if (!memoryAllocationFailed) {
|
if (!memoryAllocationFailed) {
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2387,7 +2387,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposit
|
|||||||
return (!memoryAllocationFailed);
|
return (!memoryAllocationFailed);
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab,
|
bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab,
|
||||||
float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb)
|
float *noisevarchrom, float madL[8][3], float noisevar_ab, const bool useNoiseCCurve, bool autoch, bool denoiseMethodRgb)
|
||||||
{
|
{
|
||||||
int maxlvl = WaveletCoeffs_L.maxlevel();
|
int maxlvl = WaveletCoeffs_L.maxlevel();
|
||||||
@ -2411,7 +2411,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool memoryAllocationFailed = false;
|
bool memoryAllocationFailed = false;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -2427,7 +2427,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
|||||||
if (!memoryAllocationFailed) {
|
if (!memoryAllocationFailed) {
|
||||||
|
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2446,7 +2446,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2550,7 +2550,7 @@ bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool memoryAllocationFailed = false;
|
bool memoryAllocationFailed = false;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -2565,7 +2565,7 @@ bool ImProcFunctions::WaveletDenoiseAllL(wavelet_decomposition &WaveletCoeffs_L,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!memoryAllocationFailed) {
|
if (!memoryAllocationFailed) {
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2605,7 +2605,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool memoryAllocationFailed = false;
|
bool memoryAllocationFailed = false;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
#pragma omp parallel num_threads(denoiseNestedLevels) if (denoiseNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -2619,7 +2619,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!memoryAllocationFailed) {
|
if (!memoryAllocationFailed) {
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2643,7 +2643,7 @@ bool ImProcFunctions::WaveletDenoiseAllAB(wavelet_decomposition &WaveletCoeffs_L
|
|||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir,
|
void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeffs_L, float **buffer, int level, int dir,
|
||||||
float *noisevarlum, float * madL, float * vari, int edge)
|
float *noisevarlum, float * madL, float * vari, int edge)
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -2733,7 +2733,7 @@ SSEFUNCTION void ImProcFunctions::ShrinkAllL(wavelet_decomposition &WaveletCoeff
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir,
|
void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoeffs_L, wavelet_decomposition &WaveletCoeffs_ab, float **buffer, int level, int dir,
|
||||||
float *noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch,
|
float *noisevarchrom, float noisevar_ab, const bool useNoiseCCurve, bool autoch,
|
||||||
bool denoiseMethodRgb, float * madL, float * madaab, bool madCalculated)
|
bool denoiseMethodRgb, float * madL, float * madaab, bool madCalculated)
|
||||||
|
|
||||||
@ -2848,7 +2848,7 @@ SSEFUNCTION void ImProcFunctions::ShrinkAllAB(wavelet_decomposition &WaveletCoef
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b,
|
void ImProcFunctions::ShrinkAll_info(float ** WavCoeffs_a, float ** WavCoeffs_b,
|
||||||
int W_ab, int H_ab, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut,
|
int W_ab, int H_ab, float **noisevarlum, float **noisevarchrom, float **noisevarhue, float &chaut, int &Nb, float &redaut, float &blueaut,
|
||||||
float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice, int lvl, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc,
|
float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, int schoice, int lvl, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc,
|
||||||
float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb)
|
float &maxchred, float &maxchblue, float &minchred, float &minchblue, int &nb, float &chau, float &chred, float &chblue, bool denoiseMethodRgb)
|
||||||
@ -2988,7 +2988,7 @@ void ImProcFunctions::WaveletDenoiseAll_info(int levwav, wavelet_decomposition &
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope)
|
void ImProcFunctions::RGB_denoise_infoGamCurve(const procparams::DirPyrDenoiseParams & dnparams, bool isRAW, LUTf &gamcurve, float &gam, float &gamthresh, float &gamslope)
|
||||||
{
|
{
|
||||||
gam = dnparams.gamma;
|
gam = dnparams.gamma;
|
||||||
gamthresh = 0.001f;
|
gamthresh = 0.001f;
|
||||||
@ -3153,7 +3153,7 @@ void ImProcFunctions::calcautodn_info(float &chaut, float &delta, int Nb, int le
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread)
|
void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat * provicalc, const bool isRAW, LUTf &gamcurve, float gam, float gamthresh, float gamslope, const procparams::DirPyrDenoiseParams & dnparams, const double expcomp, float &chaut, int &Nb, float &redaut, float &blueaut, float &maxredaut, float &maxblueaut, float &minredaut, float &minblueaut, float &chromina, float &sigma, float &lumema, float &sigma_L, float &redyel, float &skinc, float &nsknc, bool multiThread)
|
||||||
{
|
{
|
||||||
if ((settings->leveldnautsimpl == 1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl == 0 && dnparams.C2method == "MANU")) {
|
if ((settings->leveldnautsimpl == 1 && dnparams.Cmethod == "MAN") || (settings->leveldnautsimpl == 0 && dnparams.C2method == "MANU")) {
|
||||||
//nothing to do
|
//nothing to do
|
||||||
@ -3192,7 +3192,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
|||||||
bcalc[i] = new float[wid];
|
bcalc[i] = new float[wid];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for if (multiThread)
|
#pragma omp parallel for if (multiThread)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -3315,7 +3315,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
|||||||
//fill tile from image; convert RGB to "luma/chroma"
|
//fill tile from image; convert RGB to "luma/chroma"
|
||||||
|
|
||||||
if (isRAW) {//image is raw; use channel differences for chroma channels
|
if (isRAW) {//image is raw; use channel differences for chroma channels
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for if (multiThread)
|
#pragma omp parallel for if (multiThread)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -3368,7 +3368,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for if (multiThread)
|
#pragma omp parallel for if (multiThread)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -3386,7 +3386,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
|||||||
|
|
||||||
if (!denoiseMethodRgb) { //lab mode, modification Jacques feb 2013 and july 2014
|
if (!denoiseMethodRgb) { //lab mode, modification Jacques feb 2013 and july 2014
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for if (multiThread)
|
#pragma omp parallel for if (multiThread)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -3523,17 +3523,17 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int levwav = 5;
|
const int levwav = 5;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel sections if (multiThread)
|
#pragma omp parallel sections if (multiThread)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp section
|
#pragma omp section
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
adecomp = new wavelet_decomposition(labdn->data + datalen, labdn->W, labdn->H, levwav, 1);
|
adecomp = new wavelet_decomposition(labdn->data + datalen, labdn->W, labdn->H, levwav, 1);
|
||||||
}
|
}
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp section
|
#pragma omp section
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
@ -104,7 +104,7 @@ protected:
|
|||||||
unsigned int upperBound; // always equals size-1, parameter created for performance reason
|
unsigned int upperBound; // always equals size-1, parameter created for performance reason
|
||||||
private:
|
private:
|
||||||
unsigned int owner;
|
unsigned int owner;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
vfloat maxsv ALIGNED16;
|
vfloat maxsv ALIGNED16;
|
||||||
vfloat sizev ALIGNED16;
|
vfloat sizev ALIGNED16;
|
||||||
vint sizeiv ALIGNED16;
|
vint sizeiv ALIGNED16;
|
||||||
@ -136,7 +136,7 @@ public:
|
|||||||
maxs = size - 2;
|
maxs = size - 2;
|
||||||
maxsf = (float)maxs;
|
maxsf = (float)maxs;
|
||||||
maxIndexFloat = ((float)upperBound) - 1e-5;
|
maxIndexFloat = ((float)upperBound) - 1e-5;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
maxsv = F2V( maxs );
|
maxsv = F2V( maxs );
|
||||||
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
||||||
sizev = F2V( size - 1 );
|
sizev = F2V( size - 1 );
|
||||||
@ -167,7 +167,7 @@ public:
|
|||||||
maxs = size - 2;
|
maxs = size - 2;
|
||||||
maxsf = (float)maxs;
|
maxsf = (float)maxs;
|
||||||
maxIndexFloat = ((float)upperBound) - 1e-5;
|
maxIndexFloat = ((float)upperBound) - 1e-5;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
maxsv = F2V( maxs );
|
maxsv = F2V( maxs );
|
||||||
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
||||||
sizev = F2V( size - 1 );
|
sizev = F2V( size - 1 );
|
||||||
@ -178,7 +178,7 @@ public:
|
|||||||
{
|
{
|
||||||
data = nullptr;
|
data = nullptr;
|
||||||
reset();
|
reset();
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
maxsv = ZEROV;
|
maxsv = ZEROV;
|
||||||
sizev = ZEROV;
|
sizev = ZEROV;
|
||||||
sizeiv = _mm_setzero_si128();
|
sizeiv = _mm_setzero_si128();
|
||||||
@ -243,7 +243,7 @@ public:
|
|||||||
this->maxs = this->size - 2;
|
this->maxs = this->size - 2;
|
||||||
this->maxsf = (float)this->maxs;
|
this->maxsf = (float)this->maxs;
|
||||||
this->maxIndexFloat = ((float)this->upperBound) - 1e-5;
|
this->maxIndexFloat = ((float)this->upperBound) - 1e-5;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
this->maxsv = F2V( this->size - 2);
|
this->maxsv = F2V( this->size - 2);
|
||||||
this->sizeiv = _mm_set1_epi32( (int)(this->size - 1) );
|
this->sizeiv = _mm_set1_epi32( (int)(this->size - 1) );
|
||||||
this->sizev = F2V( this->size - 1 );
|
this->sizev = F2V( this->size - 1 );
|
||||||
@ -258,7 +258,7 @@ public:
|
|||||||
LUT<T> & operator+=(LUT<T> &rhs)
|
LUT<T> & operator+=(LUT<T> &rhs)
|
||||||
{
|
{
|
||||||
if (rhs.size == this->size) {
|
if (rhs.size == this->size) {
|
||||||
#ifdef _RT_NESTED_OPENMP // temporary solution to fix Issue #3324
|
#ifdef _OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -274,7 +274,7 @@ public:
|
|||||||
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
|
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
|
||||||
LUT<float> & operator*=(float factor)
|
LUT<float> & operator*=(float factor)
|
||||||
{
|
{
|
||||||
#ifdef _RT_NESTED_OPENMP // temporary solution to fix Issue #3324
|
#ifdef _OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -289,7 +289,7 @@ public:
|
|||||||
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
|
template<typename U = T, typename = typename std::enable_if<std::is_same<U, float>::value>::type>
|
||||||
LUT<float> & operator/=(float divisor)
|
LUT<float> & operator/=(float divisor)
|
||||||
{
|
{
|
||||||
#ifdef _RT_NESTED_OPENMP // temporary solution to fix Issue #3324
|
#ifdef _OPENMP
|
||||||
#pragma omp simd
|
#pragma omp simd
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -307,7 +307,7 @@ public:
|
|||||||
return data[ rtengine::LIM<int>(index, 0, upperBound) ];
|
return data[ rtengine::LIM<int>(index, 0, upperBound) ];
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
|
|
||||||
|
|
||||||
// NOTE: This function requires LUTs which clips only at lower bound
|
// NOTE: This function requires LUTs which clips only at lower bound
|
||||||
@ -698,7 +698,7 @@ public:
|
|||||||
maxs = size - 2;
|
maxs = size - 2;
|
||||||
maxsf = (float)maxs;
|
maxsf = (float)maxs;
|
||||||
maxIndexFloat = ((float)upperBound) - 1e-5;
|
maxIndexFloat = ((float)upperBound) - 1e-5;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
maxsv = F2V( size - 2);
|
maxsv = F2V( size - 2);
|
||||||
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
sizeiv = _mm_set1_epi32( (int)(size - 1) );
|
||||||
sizev = F2V( size - 1 );
|
sizev = F2V( size - 1 );
|
||||||
|
@ -43,7 +43,7 @@ namespace rtengine
|
|||||||
{
|
{
|
||||||
extern const Settings* settings;
|
extern const Settings* settings;
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radius, int thresh)
|
void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radius, int thresh)
|
||||||
{
|
{
|
||||||
const int halfwin = ceil(2 * radius) + 1;
|
const int halfwin = ceil(2 * radius) + 1;
|
||||||
|
|
||||||
@ -285,7 +285,7 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst,
|
|||||||
free(fringe);
|
free(fringe);
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double radius, int thresh)
|
void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * dst, double radius, int thresh)
|
||||||
{
|
{
|
||||||
const int halfwin = ceil(2 * radius) + 1;
|
const int halfwin = ceil(2 * radius) + 1;
|
||||||
|
|
||||||
@ -655,7 +655,7 @@ SSEFUNCTION void ImProcFunctions::PF_correct_RTcam(CieImage * src, CieImage * ds
|
|||||||
free(fringe);
|
free(fringe);
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad)
|
void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad)
|
||||||
{
|
{
|
||||||
const int halfwin = ceil(2 * radius) + 1;
|
const int halfwin = ceil(2 * radius) + 1;
|
||||||
MyTime t1, t2;
|
MyTime t1, t2;
|
||||||
@ -1263,7 +1263,7 @@ SSEFUNCTION void ImProcFunctions::Badpixelscam(CieImage * src, CieImage * dst, d
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, double radius, int thresh, int mode, float skinprot, float chrom)
|
void ImProcFunctions::BadpixelsLab(LabImage * src, LabImage * dst, double radius, int thresh, int mode, float skinprot, float chrom)
|
||||||
{
|
{
|
||||||
const int halfwin = ceil(2 * radius) + 1;
|
const int halfwin = ceil(2 * radius) + 1;
|
||||||
MyTime t1, t2;
|
MyTime t1, t2;
|
||||||
|
@ -38,7 +38,7 @@
|
|||||||
namespace rtengine
|
namespace rtengine
|
||||||
{
|
{
|
||||||
|
|
||||||
SSEFUNCTION void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, array2D<float> &rawData, array2D<float> &red, array2D<float> &green, array2D<float> &blue)
|
void RawImageSource::amaze_demosaic_RT(int winx, int winy, int winw, int winh, array2D<float> &rawData, array2D<float> &red, array2D<float> &green, array2D<float> &blue)
|
||||||
{
|
{
|
||||||
BENCHFUN
|
BENCHFUN
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@ template<class T, class A> void boxblur (T** src, A** dst, int radx, int rady, i
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T, class A> SSEFUNCTION void boxblur (T** src, A** dst, T* buffer, int radx, int rady, int W, int H)
|
template<class T, class A> void boxblur (T** src, A** dst, T* buffer, int radx, int rady, int W, int H)
|
||||||
{
|
{
|
||||||
//box blur image; box range = (radx,rady)
|
//box blur image; box range = (radx,rady)
|
||||||
|
|
||||||
@ -309,7 +309,7 @@ template<class T, class A> SSEFUNCTION void boxblur (T** src, A** dst, T* buffer
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T, class A> SSEFUNCTION void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H)
|
template<class T, class A> void boxblur (T* src, A* dst, A* buffer, int radx, int rady, int W, int H)
|
||||||
{
|
{
|
||||||
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
|
//box blur image; box range = (radx,rady) i.e. box size is (2*radx+1)x(2*rady+1)
|
||||||
|
|
||||||
@ -496,7 +496,7 @@ template<class T, class A> SSEFUNCTION void boxblur (T* src, A* dst, A* buffer,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T, class A> SSEFUNCTION void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H, float * temp)
|
template<class T, class A> void boxabsblur (T* src, A* dst, int radx, int rady, int W, int H, float * temp)
|
||||||
{
|
{
|
||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
@ -1725,7 +1725,7 @@ void Color::Lab2XYZ(vfloat L, vfloat a, vfloat b, vfloat &x, vfloat &y, vfloat &
|
|||||||
void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b, const float wp[3][3], int width)
|
void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b, const float wp[3][3], int width)
|
||||||
{
|
{
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
vfloat maxvalfv = F2V(MAXVALF);
|
vfloat maxvalfv = F2V(MAXVALF);
|
||||||
vfloat c116v = F2V(116.f);
|
vfloat c116v = F2V(116.f);
|
||||||
vfloat c5242d88v = F2V(5242.88f);
|
vfloat c5242d88v = F2V(5242.88f);
|
||||||
@ -1733,7 +1733,7 @@ void Color::RGB2Lab(float *R, float *G, float *B, float *L, float *a, float *b,
|
|||||||
vfloat c200v = F2V(200.f);
|
vfloat c200v = F2V(200.f);
|
||||||
#endif
|
#endif
|
||||||
int i = 0;
|
int i = 0;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
for(;i < width - 3; i+=4) {
|
for(;i < width - 3; i+=4) {
|
||||||
const vfloat rv = LVFU(R[i]);
|
const vfloat rv = LVFU(R[i]);
|
||||||
const vfloat gv = LVFU(G[i]);
|
const vfloat gv = LVFU(G[i]);
|
||||||
@ -2606,7 +2606,7 @@ void Color::gamutLchonly (float2 sincosval, float &Lprov1, float &Chprov1, const
|
|||||||
* const double wip[3][3]: matrix for working profile
|
* const double wip[3][3]: matrix for working profile
|
||||||
* bool multiThread : parallelize the loop
|
* bool multiThread : parallelize the loop
|
||||||
*/
|
*/
|
||||||
SSEFUNCTION void Color::LabGamutMunsell(float *labL, float *laba, float *labb, const int N, bool corMunsell, bool lumaMuns, bool isHLEnabled, bool gamut, const double wip[3][3])
|
void Color::LabGamutMunsell(float *labL, float *laba, float *labb, const int N, bool corMunsell, bool lumaMuns, bool isHLEnabled, bool gamut, const double wip[3][3])
|
||||||
{
|
{
|
||||||
#ifdef _DEBUG
|
#ifdef _DEBUG
|
||||||
MyTime t1e, t2e;
|
MyTime t1e, t2e;
|
||||||
|
@ -244,7 +244,7 @@ template<typename T> void wavelet_level<T>::SynthesisFilterHaarHorizontal (const
|
|||||||
* Applies a Haar filter
|
* Applies a Haar filter
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -266,11 +266,11 @@ template<typename T> void wavelet_level<T>::SynthesisFilterHaarVertical (const T
|
|||||||
* Applies a Haar filter
|
* Applies a Haar filter
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for nowait
|
#pragma omp for nowait
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -281,7 +281,7 @@ template<typename T> void wavelet_level<T>::SynthesisFilterHaarVertical (const T
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -328,7 +328,7 @@ void wavelet_level<T>::AnalysisFilterSubsampHorizontal (T * RESTRICT srcbuffer,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
template<typename T> SSEFUNCTION void wavelet_level<T>::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4],
|
template<typename T> void wavelet_level<T>::AnalysisFilterSubsampVertical (T * RESTRICT srcbuffer, T * RESTRICT dstLo, T * RESTRICT dstHi, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4],
|
||||||
const int taps, const int offset, const int width, const int height, const int row)
|
const int taps, const int offset, const int width, const int height, const int row)
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -455,7 +455,7 @@ template<typename T> void wavelet_level<T>::SynthesisFilterSubsampHorizontal (T
|
|||||||
|
|
||||||
// calculate coefficients
|
// calculate coefficients
|
||||||
int shift = skip * (taps - offset - 1); //align filter with data
|
int shift = skip * (taps - offset - 1); //align filter with data
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -506,7 +506,7 @@ template<typename T> void wavelet_level<T>::SynthesisFilterSubsampHorizontal (T
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
template<typename T> SSEFUNCTION void wavelet_level<T>::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend)
|
template<typename T> void wavelet_level<T>::SynthesisFilterSubsampVertical (T * RESTRICT srcLo, T * RESTRICT srcHi, T * RESTRICT dst, float (* RESTRICT filterLo)[4], float (* RESTRICT filterHi)[4], const int taps, const int offset, const int width, const int srcheight, const int dstheight, const float blend)
|
||||||
{
|
{
|
||||||
|
|
||||||
/* Basic convolution code
|
/* Basic convolution code
|
||||||
@ -521,7 +521,7 @@ template<typename T> SSEFUNCTION void wavelet_level<T>::SynthesisFilterSubsampVe
|
|||||||
__m128 fourv = _mm_set1_ps(4.f);
|
__m128 fourv = _mm_set1_ps(4.f);
|
||||||
__m128 srcFactorv = _mm_set1_ps(srcFactor);
|
__m128 srcFactorv = _mm_set1_ps(srcFactor);
|
||||||
__m128 dstFactorv = _mm_set1_ps(blend);
|
__m128 dstFactorv = _mm_set1_ps(blend);
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -594,7 +594,7 @@ template<typename T> void wavelet_level<T>::SynthesisFilterSubsampVertical (T *
|
|||||||
// calculate coefficients
|
// calculate coefficients
|
||||||
int shift = skip * (taps - offset - 1); //align filter with data
|
int shift = skip * (taps - offset - 1); //align filter with data
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -630,7 +630,7 @@ template<typename T> void wavelet_level<T>::SynthesisFilterSubsampVertical (T *
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
template<typename T> template<typename E> SSEFUNCTION void wavelet_level<T>::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset)
|
template<typename T> template<typename E> void wavelet_level<T>::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset)
|
||||||
{
|
{
|
||||||
|
|
||||||
/* filter along rows and columns */
|
/* filter along rows and columns */
|
||||||
@ -644,7 +644,7 @@ template<typename T> template<typename E> SSEFUNCTION void wavelet_level<T>::dec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -652,7 +652,7 @@ template<typename T> template<typename E> SSEFUNCTION void wavelet_level<T>::dec
|
|||||||
T tmpHi[m_w] ALIGNED64;
|
T tmpHi[m_w] ALIGNED64;
|
||||||
|
|
||||||
if(subsamp_out) {
|
if(subsamp_out) {
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -662,7 +662,7 @@ template<typename T> template<typename E> SSEFUNCTION void wavelet_level<T>::dec
|
|||||||
AnalysisFilterSubsampHorizontal (tmpHi, wavcoeffs[2], wavcoeffs[3], filterH, filterH + taps, taps, offset, m_w, m_w2, row / 2);
|
AnalysisFilterSubsampHorizontal (tmpHi, wavcoeffs[2], wavcoeffs[3], filterH, filterH + taps, taps, offset, m_w, m_w2, row / 2);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -678,7 +678,7 @@ template<typename T> template<typename E> SSEFUNCTION void wavelet_level<T>::dec
|
|||||||
template<typename T> template<typename E> void wavelet_level<T>::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset)
|
template<typename T> template<typename E> void wavelet_level<T>::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset)
|
||||||
{
|
{
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -687,7 +687,7 @@ template<typename T> template<typename E> void wavelet_level<T>::decompose_level
|
|||||||
/* filter along rows and columns */
|
/* filter along rows and columns */
|
||||||
if(subsamp_out)
|
if(subsamp_out)
|
||||||
{
|
{
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -697,7 +697,7 @@ template<typename T> template<typename E> void wavelet_level<T>::decompose_level
|
|||||||
AnalysisFilterSubsampHorizontal (tmpHi, wavcoeffs[2], wavcoeffs[3], filterH, filterH + taps, taps, offset, m_w, m_w2, row / 2);
|
AnalysisFilterSubsampHorizontal (tmpHi, wavcoeffs[2], wavcoeffs[3], filterH, filterH + taps, taps, offset, m_w, m_w2, row / 2);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -714,7 +714,7 @@ template<typename T> template<typename E> void wavelet_level<T>::decompose_level
|
|||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
|
|
||||||
template<typename T> template<typename E> SSEFUNCTION void wavelet_level<T>::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend)
|
template<typename T> template<typename E> void wavelet_level<T>::reconstruct_level(E* tmpLo, E* tmpHi, E * src, E *dst, float *filterV, float *filterH, int taps, int offset, const float blend)
|
||||||
{
|
{
|
||||||
if(memoryAllocationFailed) {
|
if(memoryAllocationFailed) {
|
||||||
return;
|
return;
|
||||||
|
@ -476,7 +476,7 @@ void CurveFactory::complexsgnCurve (bool & autili, bool & butili, bool & ccutil
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void CurveFactory::complexCurve (double ecomp, double black, double hlcompr, double hlcomprthresh,
|
void CurveFactory::complexCurve (double ecomp, double black, double hlcompr, double hlcomprthresh,
|
||||||
double shcompr, double br, double contr,
|
double shcompr, double br, double contr,
|
||||||
const std::vector<double>& curvePoints,
|
const std::vector<double>& curvePoints,
|
||||||
const std::vector<double>& curvePoints2,
|
const std::vector<double>& curvePoints2,
|
||||||
|
@ -828,7 +828,7 @@ class WeightedStdToneCurve : public ToneCurve
|
|||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
float Triangle(float refX, float refY, float X2) const;
|
float Triangle(float refX, float refY, float X2) const;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
vfloat Triangle(vfloat refX, vfloat refY, vfloat X2) const;
|
vfloat Triangle(vfloat refX, vfloat refY, vfloat X2) const;
|
||||||
#endif
|
#endif
|
||||||
public:
|
public:
|
||||||
@ -904,7 +904,7 @@ inline void StandardToneCurve::BatchApply(
|
|||||||
// If we get to the end before getting to an aligned address, just return.
|
// If we get to the end before getting to an aligned address, just return.
|
||||||
// (Or, for non-SSE mode, if we get to the end.)
|
// (Or, for non-SSE mode, if we get to the end.)
|
||||||
return;
|
return;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
} else if (reinterpret_cast<uintptr_t>(&r[i]) % 16 == 0) {
|
} else if (reinterpret_cast<uintptr_t>(&r[i]) % 16 == 0) {
|
||||||
// Otherwise, we get to the first aligned address; go to the SSE part.
|
// Otherwise, we get to the first aligned address; go to the SSE part.
|
||||||
break;
|
break;
|
||||||
@ -916,7 +916,7 @@ inline void StandardToneCurve::BatchApply(
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
for (; i + 3 < end; i += 4) {
|
for (; i + 3 < end; i += 4) {
|
||||||
__m128 r_val = LVF(r[i]);
|
__m128 r_val = LVF(r[i]);
|
||||||
__m128 g_val = LVF(g[i]);
|
__m128 g_val = LVF(g[i]);
|
||||||
@ -1007,7 +1007,7 @@ inline float WeightedStdToneCurve::Triangle(float a, float a1, float b) const
|
|||||||
return a1;
|
return a1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
inline vfloat WeightedStdToneCurve::Triangle(vfloat a, vfloat a1, vfloat b) const
|
inline vfloat WeightedStdToneCurve::Triangle(vfloat a, vfloat a1, vfloat b) const
|
||||||
{
|
{
|
||||||
vfloat a2 = a1 - a;
|
vfloat a2 = a1 - a;
|
||||||
@ -1061,7 +1061,7 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
|
|||||||
// If we get to the end before getting to an aligned address, just return.
|
// If we get to the end before getting to an aligned address, just return.
|
||||||
// (Or, for non-SSE mode, if we get to the end.)
|
// (Or, for non-SSE mode, if we get to the end.)
|
||||||
return;
|
return;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
} else if (reinterpret_cast<uintptr_t>(&r[i]) % 16 == 0) {
|
} else if (reinterpret_cast<uintptr_t>(&r[i]) % 16 == 0) {
|
||||||
// Otherwise, we get to the first aligned address; go to the SSE part.
|
// Otherwise, we get to the first aligned address; go to the SSE part.
|
||||||
break;
|
break;
|
||||||
@ -1071,7 +1071,7 @@ inline void WeightedStdToneCurve::BatchApply(const size_t start, const size_t en
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
const vfloat c65535v = F2V(65535.f);
|
const vfloat c65535v = F2V(65535.f);
|
||||||
const vfloat zd5v = F2V(0.5f);
|
const vfloat zd5v = F2V(0.5f);
|
||||||
const vfloat zd25v = F2V(0.25f);
|
const vfloat zd25v = F2V(0.25f);
|
||||||
|
@ -1314,7 +1314,7 @@ void RawImageSource::jdl_interpolate_omp() // from "Lassus"
|
|||||||
// Adapted to RawTherapee by Jacques Desmis 3/2013
|
// Adapted to RawTherapee by Jacques Desmis 3/2013
|
||||||
// Improved speed and reduced memory consumption by Ingo Weyrich 2/2015
|
// Improved speed and reduced memory consumption by Ingo Weyrich 2/2015
|
||||||
//TODO Tiles to reduce memory consumption
|
//TODO Tiles to reduce memory consumption
|
||||||
SSEFUNCTION void RawImageSource::lmmse_interpolate_omp(int winw, int winh, array2D<float> &rawData, array2D<float> &red, array2D<float> &green, array2D<float> &blue, int iterations)
|
void RawImageSource::lmmse_interpolate_omp(int winw, int winh, array2D<float> &rawData, array2D<float> &red, array2D<float> &green, array2D<float> &blue, int iterations)
|
||||||
{
|
{
|
||||||
const int width = winw, height = winh;
|
const int width = winw, height = winh;
|
||||||
const int ba = 10;
|
const int ba = 10;
|
||||||
@ -1946,7 +1946,7 @@ SSEFUNCTION void RawImageSource::lmmse_interpolate_omp(int winw, int winh, array
|
|||||||
// SSE version by Ingo Weyrich 5/2013
|
// SSE version by Ingo Weyrich 5/2013
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#define CLIPV(a) LIMV(a,zerov,c65535v)
|
#define CLIPV(a) LIMV(a,zerov,c65535v)
|
||||||
SSEFUNCTION void RawImageSource::igv_interpolate(int winw, int winh)
|
void RawImageSource::igv_interpolate(int winw, int winh)
|
||||||
{
|
{
|
||||||
static const float eps = 1e-5f, epssq = 1e-5f; //mod epssq -10f =>-5f Jacques 3/2013 to prevent artifact (divide by zero)
|
static const float eps = 1e-5f, epssq = 1e-5f; //mod epssq -10f =>-5f Jacques 3/2013 to prevent artifact (divide by zero)
|
||||||
|
|
||||||
@ -2873,7 +2873,7 @@ void RawImageSource::nodemosaic(bool bw)
|
|||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#define CLIPV(a) LIMV(a,ZEROV,c65535v)
|
#define CLIPV(a) LIMV(a,ZEROV,c65535v)
|
||||||
#endif
|
#endif
|
||||||
SSEFUNCTION void RawImageSource::refinement(int PassCount)
|
void RawImageSource::refinement(int PassCount)
|
||||||
{
|
{
|
||||||
MyTime t1e, t2e;
|
MyTime t1e, t2e;
|
||||||
t1e.set();
|
t1e.set();
|
||||||
@ -3870,7 +3870,7 @@ void RawImageSource::cielab (const float (*rgb)[3], float* l, float* a, float *b
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
vfloat zd5v = F2V(0.5f);
|
vfloat zd5v = F2V(0.5f);
|
||||||
vfloat c116v = F2V(116.f);
|
vfloat c116v = F2V(116.f);
|
||||||
vfloat c16v = F2V(16.f);
|
vfloat c16v = F2V(16.f);
|
||||||
@ -3887,7 +3887,7 @@ void RawImageSource::cielab (const float (*rgb)[3], float* l, float* a, float *b
|
|||||||
|
|
||||||
for(int i = 0; i < height; i++) {
|
for(int i = 0; i < height; i++) {
|
||||||
int j = 0;
|
int j = 0;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ ) // vectorized LUT access is restricted to __x86_64__ => we have to use the same restriction
|
#ifdef __SSE2__
|
||||||
|
|
||||||
for(; j < labWidth - 3; j += 4) {
|
for(; j < labWidth - 3; j += 4) {
|
||||||
vfloat redv, greenv, bluev;
|
vfloat redv, greenv, bluev;
|
||||||
|
@ -40,7 +40,7 @@ extern const Settings* settings;
|
|||||||
|
|
||||||
//sequence of scales
|
//sequence of scales
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidth, int srcheight, float ** l_a, float ** l_b, const double * mult, const double dirpyrThreshold, const double skinprot, float b_l, float t_l, float t_r, int scaleprev)
|
void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidth, int srcheight, float ** l_a, float ** l_b, const double * mult, const double dirpyrThreshold, const double skinprot, float b_l, float t_l, float t_r, int scaleprev)
|
||||||
{
|
{
|
||||||
int lastlevel = maxlevel;
|
int lastlevel = maxlevel;
|
||||||
|
|
||||||
@ -380,7 +380,7 @@ void ImProcFunctions :: dirpyr_equalizercam (CieImage *ncie, float ** src, float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, int width, int height, int level, int scale)
|
void ImProcFunctions::dirpyr_channel(float ** data_fine, float ** data_coarse, int width, int height, int level, int scale)
|
||||||
{
|
{
|
||||||
// scale is spacing of directional averaging weights
|
// scale is spacing of directional averaging weights
|
||||||
// calculate weights, compute directionally weighted average
|
// calculate weights, compute directionally weighted average
|
||||||
|
@ -52,7 +52,7 @@ LUTf RawImageSource::initInvGrad()
|
|||||||
#endif
|
#endif
|
||||||
//LUTf RawImageSource::invGrad = RawImageSource::initInvGrad();
|
//LUTf RawImageSource::invGrad = RawImageSource::initInvGrad();
|
||||||
|
|
||||||
SSEFUNCTION void RawImageSource::fast_demosaic()
|
void RawImageSource::fast_demosaic()
|
||||||
{
|
{
|
||||||
|
|
||||||
double progress = 0.0;
|
double progress = 0.0;
|
||||||
|
@ -229,7 +229,7 @@ template<class T> void gaussHorizontal3 (T** src, T** dst, int W, int H, const f
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
template<class T> SSEFUNCTION void gaussVertical3 (T** src, T** dst, int W, int H, const float c0, const float c1)
|
template<class T> void gaussVertical3 (T** src, T** dst, int W, int H, const float c0, const float c1)
|
||||||
{
|
{
|
||||||
vfloat Tv = F2V(0.f), Tm1v, Tp1v;
|
vfloat Tv = F2V(0.f), Tm1v, Tp1v;
|
||||||
vfloat Tv1 = F2V(0.f), Tm1v1, Tp1v1;
|
vfloat Tv1 = F2V(0.f), Tm1v1, Tp1v1;
|
||||||
@ -314,7 +314,7 @@ template<class T> void gaussVertical3 (T** src, T** dst, int W, int H, const flo
|
|||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
// fast gaussian approximation if the support window is large
|
// fast gaussian approximation if the support window is large
|
||||||
template<class T> SSEFUNCTION void gaussHorizontalSse (T** src, T** dst, const int W, const int H, const float sigma)
|
template<class T> void gaussHorizontalSse (T** src, T** dst, const int W, const int H, const float sigma)
|
||||||
{
|
{
|
||||||
double b1, b2, b3, B, M[3][3];
|
double b1, b2, b3, B, M[3][3];
|
||||||
calculateYvVFactors<double>(sigma, b1, b2, b3, B, M);
|
calculateYvVFactors<double>(sigma, b1, b2, b3, B, M);
|
||||||
@ -474,7 +474,7 @@ template<class T> void gaussHorizontal (T** src, T** dst, const int W, const int
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
template<class T> SSEFUNCTION void gaussVerticalSse (T** src, T** dst, const int W, const int H, const float sigma)
|
template<class T> void gaussVerticalSse (T** src, T** dst, const int W, const int H, const float sigma)
|
||||||
{
|
{
|
||||||
double b1, b2, b3, B, M[3][3];
|
double b1, b2, b3, B, M[3][3];
|
||||||
calculateYvVFactors<double>(sigma, b1, b2, b3, B, M);
|
calculateYvVFactors<double>(sigma, b1, b2, b3, B, M);
|
||||||
@ -617,7 +617,7 @@ template<class T> SSEFUNCTION void gaussVerticalSse (T** src, T** dst, const int
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
template<class T> SSEFUNCTION void gaussVerticalSsemult (T** RESTRICT src, T** RESTRICT dst, const int W, const int H, const float sigma)
|
template<class T> void gaussVerticalSsemult (T** RESTRICT src, T** RESTRICT dst, const int W, const int H, const float sigma)
|
||||||
{
|
{
|
||||||
double b1, b2, b3, B, M[3][3];
|
double b1, b2, b3, B, M[3][3];
|
||||||
calculateYvVFactors<double>(sigma, b1, b2, b3, B, M);
|
calculateYvVFactors<double>(sigma, b1, b2, b3, B, M);
|
||||||
@ -758,7 +758,7 @@ template<class T> SSEFUNCTION void gaussVerticalSsemult (T** RESTRICT src, T** R
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T> SSEFUNCTION void gaussVerticalSsediv (T** RESTRICT src, T** RESTRICT dst, T** divBuffer, const int W, const int H, const float sigma)
|
template<class T> void gaussVerticalSsediv (T** RESTRICT src, T** RESTRICT dst, T** divBuffer, const int W, const int H, const float sigma)
|
||||||
{
|
{
|
||||||
double b1, b2, b3, B, M[3][3];
|
double b1, b2, b3, B, M[3][3];
|
||||||
calculateYvVFactors<double>(sigma, b1, b2, b3, B, M);
|
calculateYvVFactors<double>(sigma, b1, b2, b3, B, M);
|
||||||
|
@ -29,29 +29,13 @@ typedef __m128 vfloat;
|
|||||||
typedef __m128i vint2;
|
typedef __m128i vint2;
|
||||||
|
|
||||||
//
|
//
|
||||||
#ifdef __GNUC__
|
|
||||||
#if (!defined(WIN32) || defined( __x86_64__ ))
|
|
||||||
#define LVF(x) _mm_load_ps((float*)&x)
|
#define LVF(x) _mm_load_ps((float*)&x)
|
||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
#define LVFU(x) _mm_loadu_ps(&x)
|
||||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
#define STVF(x,y) _mm_store_ps(&x,y)
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
||||||
#define LVI(x) _mm_load_si128((__m128i*)&x)
|
#define LVI(x) _mm_load_si128((__m128i*)&x)
|
||||||
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also need to map the aligned functions to unaligned functions for WIN32 builds
|
|
||||||
#define LVF(x) _mm_loadu_ps((float*)&x)
|
|
||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
|
||||||
#define STVF(x,y) _mm_storeu_ps(&x,y)
|
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
|
||||||
#define LVI(x) _mm_loadu_si128((__m128i*)&x)
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define LVF(x) _mm_load_ps((float*)&x)
|
|
||||||
#define LVFU(x) _mm_loadu_ps(&x)
|
|
||||||
#define STVF(x,y) _mm_store_ps(&x,y)
|
|
||||||
#define STVFU(x,y) _mm_storeu_ps(&x,y)
|
|
||||||
#define LVI(x) _mm_load_si128((__m128i*)&x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__x86_64__) && defined(__AVX__)
|
#ifdef __AVX__
|
||||||
#define PERMUTEPS(a,mask) _mm_permute_ps(a,mask)
|
#define PERMUTEPS(a,mask) _mm_permute_ps(a,mask)
|
||||||
#else
|
#else
|
||||||
#define PERMUTEPS(a,mask) _mm_shuffle_ps(a,a,mask)
|
#define PERMUTEPS(a,mask) _mm_shuffle_ps(a,a,mask)
|
||||||
@ -67,7 +51,7 @@ static INLINE vfloat LC2VFU(float &a)
|
|||||||
|
|
||||||
|
|
||||||
// Store a vector of 4 floats in a[0],a[2],a[4] and a[6]
|
// Store a vector of 4 floats in a[0],a[2],a[4] and a[6]
|
||||||
#if defined(__x86_64__) && defined(__SSE4_1__)
|
#ifdef __SSE4_1__
|
||||||
// SSE4.1 => use _mm_blend_ps instead of _mm_set_epi32 and vself
|
// SSE4.1 => use _mm_blend_ps instead of _mm_set_epi32 and vself
|
||||||
#define STC2VFU(a,v) {\
|
#define STC2VFU(a,v) {\
|
||||||
__m128 TST1V = _mm_loadu_ps(&a);\
|
__m128 TST1V = _mm_loadu_ps(&a);\
|
||||||
|
@ -35,7 +35,7 @@ extern const Settings* settings;
|
|||||||
|
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
SSEFUNCTION void RawImageSource::boxblur2(float** src, float** dst, float** temp, int H, int W, int box )
|
void RawImageSource::boxblur2(float** src, float** dst, float** temp, int H, int W, int box )
|
||||||
{
|
{
|
||||||
//box blur image channel; box size = 2*box+1
|
//box blur image channel; box size = 2*box+1
|
||||||
//horizontal blur
|
//horizontal blur
|
||||||
|
@ -53,7 +53,7 @@ using namespace rtengine;
|
|||||||
// begin of helper function for rgbProc()
|
// begin of helper function for rgbProc()
|
||||||
void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) {
|
void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize) {
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
vfloat cr = F2V(0.299f);
|
vfloat cr = F2V(0.299f);
|
||||||
vfloat cg = F2V(0.587f);
|
vfloat cg = F2V(0.587f);
|
||||||
vfloat cb = F2V(0.114f);
|
vfloat cb = F2V(0.114f);
|
||||||
@ -61,7 +61,7 @@ void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float
|
|||||||
|
|
||||||
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
||||||
int j = jstart, tj = 0;
|
int j = jstart, tj = 0;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
for (; j < tW - 3; j+=4, tj+=4) {
|
for (; j < tW - 3; j+=4, tj+=4) {
|
||||||
|
|
||||||
vfloat rv = LVF(rtemp[ti * tileSize + tj]);
|
vfloat rv = LVF(rtemp[ti * tileSize + tj]);
|
||||||
@ -94,14 +94,14 @@ void shadowToneCurve(const LUTf &shtonecurve, float *rtemp, float *gtemp, float
|
|||||||
|
|
||||||
void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, float exp_scale, float comp, float hlrange) {
|
void highlightToneCurve(const LUTf &hltonecurve, float *rtemp, float *gtemp, float *btemp, int istart, int tH, int jstart, int tW, int tileSize, float exp_scale, float comp, float hlrange) {
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
vfloat threev = F2V(3.f);
|
vfloat threev = F2V(3.f);
|
||||||
vfloat maxvalfv = F2V(MAXVALF);
|
vfloat maxvalfv = F2V(MAXVALF);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
||||||
int j = jstart, tj = 0;
|
int j = jstart, tj = 0;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
for (; j < tW - 3; j+=4, tj+=4) {
|
for (; j < tW - 3; j+=4, tj+=4) {
|
||||||
|
|
||||||
vfloat rv = LVF(rtemp[ti * tileSize + tj]);
|
vfloat rv = LVF(rtemp[ti * tileSize + tj]);
|
||||||
@ -158,7 +158,7 @@ void proPhotoBlue(float *rtemp, float *gtemp, float *btemp, int istart, int tH,
|
|||||||
// this is a hack to avoid the blue=>black bug (Issue 2141)
|
// this is a hack to avoid the blue=>black bug (Issue 2141)
|
||||||
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
||||||
int j = jstart, tj = 0;
|
int j = jstart, tj = 0;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
for (; j < tW - 3; j+=4, tj+=4) {
|
for (; j < tW - 3; j+=4, tj+=4) {
|
||||||
vfloat rv = LVF(rtemp[ti * tileSize + tj]);
|
vfloat rv = LVF(rtemp[ti * tileSize + tj]);
|
||||||
vfloat gv = LVF(gtemp[ti * tileSize + tj]);
|
vfloat gv = LVF(gtemp[ti * tileSize + tj]);
|
||||||
@ -3763,7 +3763,7 @@ void ImProcFunctions::rgbProc (Imagefloat* working, LabImage* lab, PipetteBuffer
|
|||||||
} else {
|
} else {
|
||||||
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
for (int i = istart, ti = 0; i < tH; i++, ti++) {
|
||||||
int j = jstart, tj = 0;
|
int j = jstart, tj = 0;
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
for (; j < tW - 3; j+=4, tj+=4) {
|
for (; j < tW - 3; j+=4, tj+=4) {
|
||||||
//brightness/contrast
|
//brightness/contrast
|
||||||
STVF(rtemp[ti * TS + tj], tonecurve(LVF(rtemp[ti * TS + tj])));
|
STVF(rtemp[ti * TS + tj], tonecurve(LVF(rtemp[ti * TS + tj])));
|
||||||
@ -5391,7 +5391,7 @@ void ImProcFunctions::luminanceCurve (LabImage* lold, LabImage* lnew, LUTf & cur
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::chromiLuminanceCurve (PipetteBuffer *pipetteBuffer, int pW, LabImage* lold, LabImage* lnew, LUTf & acurve, LUTf & bcurve, LUTf & satcurve, LUTf & lhskcurve, LUTf & clcurve, LUTf & curve, bool utili, bool autili, bool butili, bool ccutili, bool cclutili, bool clcutili, LUTu &histCCurve, LUTu &histLCurve)
|
void ImProcFunctions::chromiLuminanceCurve (PipetteBuffer *pipetteBuffer, int pW, LabImage* lold, LabImage* lnew, LUTf & acurve, LUTf & bcurve, LUTf & satcurve, LUTf & lhskcurve, LUTf & clcurve, LUTf & curve, bool utili, bool autili, bool butili, bool ccutili, bool cclutili, bool clcutili, LUTu &histCCurve, LUTu &histLCurve)
|
||||||
{
|
{
|
||||||
if (!params->labCurve.enabled) {
|
if (!params->labCurve.enabled) {
|
||||||
return;
|
return;
|
||||||
@ -6939,7 +6939,7 @@ void ImProcFunctions::rgb2lab (const Imagefloat &src, LabImage &dst, const Glib:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::lab2rgb (const LabImage &src, Imagefloat &dst, const Glib::ustring &workingSpace)
|
void ImProcFunctions::lab2rgb (const LabImage &src, Imagefloat &dst, const Glib::ustring &workingSpace)
|
||||||
{
|
{
|
||||||
TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix ( workingSpace );
|
TMatrix wiprof = ICCStore::getInstance()->workingSpaceInverseMatrix ( workingSpace );
|
||||||
const float wip[3][3] = {
|
const float wip[3][3] = {
|
||||||
|
@ -31,7 +31,7 @@ using namespace std;
|
|||||||
namespace rtengine
|
namespace rtengine
|
||||||
{
|
{
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
||||||
{
|
{
|
||||||
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
// impulse noise removal
|
// impulse noise removal
|
||||||
@ -260,7 +260,7 @@ SSEFUNCTION void ImProcFunctions::impulse_nr (LabImage* lab, double thresh)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh, float **buffers[3])
|
void ImProcFunctions::impulse_nrcam (CieImage* ncie, double thresh, float **buffers[3])
|
||||||
{
|
{
|
||||||
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
// %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
// impulse noise removal
|
// impulse noise removal
|
||||||
|
@ -178,7 +178,7 @@ void ImProcFunctions::Lanczos (const Imagefloat* src, Imagefloat* dst, float sca
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::Lanczos (const LabImage* src, LabImage* dst, float scale)
|
void ImProcFunctions::Lanczos (const LabImage* src, LabImage* dst, float scale)
|
||||||
{
|
{
|
||||||
const float delta = 1.0f / scale;
|
const float delta = 1.0f / scale;
|
||||||
const float a = 3.0f;
|
const float a = 3.0f;
|
||||||
|
@ -34,7 +34,7 @@ namespace rtengine
|
|||||||
#define ABS(a) ((a)<0?-(a):(a))
|
#define ABS(a) ((a)<0?-(a):(a))
|
||||||
|
|
||||||
extern const Settings* settings;
|
extern const Settings* settings;
|
||||||
SSEFUNCTION void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H)
|
void ImProcFunctions::dcdamping (float** aI, float** aO, float damping, int W, int H)
|
||||||
{
|
{
|
||||||
|
|
||||||
const float dampingFac = -2.0 / (damping * damping);
|
const float dampingFac = -2.0 / (damping * damping);
|
||||||
|
@ -140,7 +140,7 @@ struct cont_params {
|
|||||||
int wavNestedLevels = 1;
|
int wavNestedLevels = 1;
|
||||||
|
|
||||||
|
|
||||||
SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const procparams::WaveletParams & waparams, const WavCurve & wavCLVCcurve, const WavOpacityCurveRG & waOpacityCurveRG, const WavOpacityCurveBY & waOpacityCurveBY, const WavOpacityCurveW & waOpacityCurveW, const WavOpacityCurveWL & waOpacityCurveWL, LUTf &wavclCurve, int skip)
|
void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const procparams::WaveletParams & waparams, const WavCurve & wavCLVCcurve, const WavOpacityCurveRG & waOpacityCurveRG, const WavOpacityCurveBY & waOpacityCurveBY, const WavOpacityCurveW & waOpacityCurveW, const WavOpacityCurveWL & waOpacityCurveWL, LUTf &wavclCurve, int skip)
|
||||||
|
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -645,7 +645,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
|||||||
numthreads = MIN(numthreads, maxnumberofthreadsforwavelet);
|
numthreads = MIN(numthreads, maxnumberofthreadsforwavelet);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
wavNestedLevels = omp_get_max_threads() / numthreads;
|
wavNestedLevels = omp_get_max_threads() / numthreads;
|
||||||
bool oldNested = omp_get_nested();
|
bool oldNested = omp_get_nested();
|
||||||
|
|
||||||
@ -720,7 +720,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
|||||||
Lold = lab->L;
|
Lold = lab->L;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -784,7 +784,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -868,7 +868,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
|||||||
if(!Ldecomp->memoryAllocationFailed) {
|
if(!Ldecomp->memoryAllocationFailed) {
|
||||||
|
|
||||||
float madL[8][3];
|
float madL[8][3];
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1076,7 +1076,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
|||||||
|
|
||||||
bool highlight = params->toneCurve.hrenabled;
|
bool highlight = params->toneCurve.hrenabled;
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for schedule(dynamic,16) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel for schedule(dynamic,16) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1246,7 +1246,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
|||||||
delete [] varchro;
|
delete [] varchro;
|
||||||
|
|
||||||
}
|
}
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
omp_set_nested(oldNested);
|
omp_set_nested(oldNested);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1276,12 +1276,12 @@ void ImProcFunctions::Aver( float * RESTRICT DataList, int datalen, float &aver
|
|||||||
float thres = 5.f;//different fom zero to take into account only data large enough
|
float thres = 5.f;//different fom zero to take into account only data large enough
|
||||||
max = 0.f;
|
max = 0.f;
|
||||||
min = 0.f;
|
min = 0.f;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
float lmax = 0.f, lmin = 0.f;
|
float lmax = 0.f, lmin = 0.f;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for reduction(+:averaP,averaN,countP,countN) nowait
|
#pragma omp for reduction(+:averaP,averaN,countP,countN) nowait
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1305,7 +1305,7 @@ void ImProcFunctions::Aver( float * RESTRICT DataList, int datalen, float &aver
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp critical
|
#pragma omp critical
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -1335,7 +1335,7 @@ void ImProcFunctions::Sigma( float * RESTRICT DataList, int datalen, float aver
|
|||||||
float variP = 0.f, variN = 0.f;
|
float variP = 0.f, variN = 0.f;
|
||||||
float thres = 5.f;//different fom zero to take into account only data large enough
|
float thres = 5.f;//different fom zero to take into account only data large enough
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for reduction(+:variP,variN,countP,countN) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel for reduction(+:variP,variN,countP,countN) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1434,7 +1434,7 @@ float *ImProcFunctions::ContrastDR(float *Source, int W_L, int H_L, float *Contr
|
|||||||
}
|
}
|
||||||
|
|
||||||
memcpy(Contrast, Source, n * sizeof(float));
|
memcpy(Contrast, Source, n * sizeof(float));
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1445,19 +1445,19 @@ float *ImProcFunctions::ContrastDR(float *Source, int W_L, int H_L, float *Contr
|
|||||||
return Contrast;
|
return Contrast;
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost, float *Compressed)
|
float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost, float *Compressed)
|
||||||
{
|
{
|
||||||
|
|
||||||
const float eps = 0.000001f;
|
const float eps = 0.000001f;
|
||||||
int n = W_L * H_L;
|
int n = W_L * H_L;
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
__m128 epsv = _mm_set1_ps( eps );
|
__m128 epsv = _mm_set1_ps( eps );
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1471,7 +1471,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1508,7 +1508,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -1516,7 +1516,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L,
|
|||||||
__m128 epsv = _mm_set1_ps( eps );
|
__m128 epsv = _mm_set1_ps( eps );
|
||||||
__m128 DetailBoostv = _mm_set1_ps( DetailBoost );
|
__m128 DetailBoostv = _mm_set1_ps( DetailBoost );
|
||||||
__m128 tempv = _mm_set1_ps( temp );
|
__m128 tempv = _mm_set1_ps( temp );
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1537,7 +1537,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1572,7 +1572,7 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, struct cont_params &cp
|
|||||||
min0 = 0.0f;
|
min0 = 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1592,7 +1592,7 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, struct cont_params &cp
|
|||||||
CompressDR(WavCoeffs_L0, W_L, H_L, Compression, DetailBoost, WavCoeffs_L0);
|
CompressDR(WavCoeffs_L0, W_L, H_L, Compression, DetailBoost, WavCoeffs_L0);
|
||||||
|
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for // removed schedule(dynamic,10)
|
#pragma omp parallel for // removed schedule(dynamic,10)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1624,7 +1624,7 @@ void ImProcFunctions::EPDToneMapResid(float * WavCoeffs_L0, unsigned int Iterat
|
|||||||
}
|
}
|
||||||
|
|
||||||
// max0=32768.f;
|
// max0=32768.f;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1649,7 +1649,7 @@ void ImProcFunctions::EPDToneMapResid(float * WavCoeffs_L0, unsigned int Iterat
|
|||||||
epd2.CompressDynamicRange(WavCoeffs_L0, (float)sca / skip, edgest, Compression, DetailBoost, Iterates, rew);
|
epd2.CompressDynamicRange(WavCoeffs_L0, (float)sca / skip, edgest, Compression, DetailBoost, Iterates, rew);
|
||||||
|
|
||||||
//Restore past range, also desaturate a bit per Mantiuk's Color correction for tone mapping.
|
//Restore past range, also desaturate a bit per Mantiuk's Color correction for tone mapping.
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for // removed schedule(dynamic,10)
|
#pragma omp parallel for // removed schedule(dynamic,10)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1692,7 +1692,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
|
|||||||
float min0 = FLT_MAX;
|
float min0 = FLT_MAX;
|
||||||
|
|
||||||
if(contrast != 0.f || (cp.tonemap && cp.resena)) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
if(contrast != 0.f || (cp.tonemap && cp.resena)) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for reduction(+:avedbl) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel for reduction(+:avedbl) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1700,14 +1700,14 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
|
|||||||
avedbl += WavCoeffs_L0[i];
|
avedbl += WavCoeffs_L0[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
float lminL = FLT_MAX;
|
float lminL = FLT_MAX;
|
||||||
float lmaxL = 0.f;
|
float lmaxL = 0.f;
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1722,7 +1722,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp critical
|
#pragma omp critical
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -1782,13 +1782,13 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
|
|||||||
koeLi[j][i] = 0.f;
|
koeLi[j][i] = 0.f;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
if(contrast != 0.f && cp.resena) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
if(contrast != 0.f && cp.resena) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
||||||
{
|
{
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1817,18 +1817,18 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
|
|||||||
if(cp.tonemap && cp.contmet == 1 && cp.resena) {
|
if(cp.tonemap && cp.contmet == 1 && cp.resena) {
|
||||||
float maxp = max0 * 256.f;
|
float maxp = max0 * 256.f;
|
||||||
float minp = min0 * 256.f;
|
float minp = min0 * 256.f;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp single
|
#pragma omp single
|
||||||
#endif
|
#endif
|
||||||
ContrastResid(WavCoeffs_L0, cp, W_L, H_L, maxp, minp);
|
ContrastResid(WavCoeffs_L0, cp, W_L, H_L, maxp, minp);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp barrier
|
#pragma omp barrier
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if((cp.conres != 0.f || cp.conresH != 0.f) && cp.resena) { // cp.conres = 0.f and cp.comresH = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
if((cp.conres != 0.f || cp.conresH != 0.f) && cp.resena) { // cp.conres = 0.f and cp.comresH = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for nowait
|
#pragma omp for nowait
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1890,7 +1890,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
|
|||||||
tmC[i] = &tmCBuffer[i * W_L];
|
tmC[i] = &tmCBuffer[i * W_L];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1910,7 +1910,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
|
|||||||
float aamp = 1.f + cp.eddetthrHi / 100.f;
|
float aamp = 1.f + cp.eddetthrHi / 100.f;
|
||||||
|
|
||||||
for (int lvl = 0; lvl < 4; lvl++) {
|
for (int lvl = 0; lvl < 4; lvl++) {
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic,16)
|
#pragma omp for schedule(dynamic,16)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2013,7 +2013,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
|
|||||||
// end
|
// end
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2048,7 +2048,7 @@ void ImProcFunctions::WaveletAandBAllAB(wavelet_decomposition &WaveletCoeffs_a,
|
|||||||
|
|
||||||
float * WavCoeffs_a0 = WaveletCoeffs_a.coeff0;
|
float * WavCoeffs_a0 = WaveletCoeffs_a.coeff0;
|
||||||
float * WavCoeffs_b0 = WaveletCoeffs_b.coeff0;
|
float * WavCoeffs_b0 = WaveletCoeffs_b.coeff0;
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
@ -2056,7 +2056,7 @@ void ImProcFunctions::WaveletAandBAllAB(wavelet_decomposition &WaveletCoeffs_a,
|
|||||||
float huebuffer[W_L] ALIGNED64;
|
float huebuffer[W_L] ALIGNED64;
|
||||||
float chrbuffer[W_L] ALIGNED64;
|
float chrbuffer[W_L] ALIGNED64;
|
||||||
#endif // __SSE2__
|
#endif // __SSE2__
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic,16)
|
#pragma omp for schedule(dynamic,16)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2116,13 +2116,13 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float
|
|||||||
|
|
||||||
float * WavCoeffs_ab0 = WaveletCoeffs_ab.coeff0;
|
float * WavCoeffs_ab0 = WaveletCoeffs_ab.coeff0;
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
if(cp.chrores != 0.f && cp.resena) { // cp.chrores == 0.f means all will be multiplied by 1.f, so we can skip the processing of residual
|
if(cp.chrores != 0.f && cp.resena) { // cp.chrores == 0.f means all will be multiplied by 1.f, so we can skip the processing of residual
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for nowait
|
#pragma omp for nowait
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2175,7 +2175,7 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float
|
|||||||
|
|
||||||
if(cp.cbena && cp.resena) {//if user select Toning and color balance
|
if(cp.cbena && cp.resena) {//if user select Toning and color balance
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for nowait
|
#pragma omp for nowait
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2230,7 +2230,7 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for schedule(dynamic) collapse(2)
|
#pragma omp for schedule(dynamic) collapse(2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2425,7 +2425,7 @@ void ImProcFunctions::finalContAllL (float ** WavCoeffs_L, float * WavCoeffs_L0,
|
|||||||
float bsig = 0.5f - asig * mean[level];
|
float bsig = 0.5f - asig * mean[level];
|
||||||
float amean = 0.5f / mean[level];
|
float amean = 0.5f / mean[level];
|
||||||
|
|
||||||
#ifdef _RT_NESTED_OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for schedule(dynamic, W_L * 16) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
#pragma omp parallel for schedule(dynamic, W_L * 16) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1131,7 +1131,7 @@ void rtengine::LCPMapper::correctCA(double& x, double& y, int cx, int cy, int ch
|
|||||||
y -= cy;
|
y -= cy;
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void rtengine::LCPMapper::processVignetteLine(int width, int y, float* line) const
|
void rtengine::LCPMapper::processVignetteLine(int width, int y, float* line) const
|
||||||
{
|
{
|
||||||
// No need for swapXY, since vignette is in RAW and always before rotation
|
// No need for swapXY, since vignette is in RAW and always before rotation
|
||||||
float yd = ((float)y - mc.y0) * mc.rfy;
|
float yd = ((float)y - mc.y0) * mc.rfy;
|
||||||
@ -1169,7 +1169,7 @@ SSEFUNCTION void rtengine::LCPMapper::processVignetteLine(int width, int y, floa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void rtengine::LCPMapper::processVignetteLine3Channels(int width, int y, float* line) const
|
void rtengine::LCPMapper::processVignetteLine3Channels(int width, int y, float* line) const
|
||||||
{
|
{
|
||||||
// No need for swapXY, since vignette is in RAW and always before rotation
|
// No need for swapXY, since vignette is in RAW and always before rotation
|
||||||
float yd = ((float)y - mc.y0) * mc.rfy;
|
float yd = ((float)y - mc.y0) * mc.rfy;
|
||||||
|
@ -26,44 +26,14 @@
|
|||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#include "sleefsseavx.c"
|
#include "sleefsseavx.c"
|
||||||
#ifdef __GNUC__
|
|
||||||
#if defined(WIN32) && !defined( __x86_64__ )
|
|
||||||
// needed for actual versions of GCC with 32-Bit Windows
|
|
||||||
#define SSEFUNCTION __attribute__((force_align_arg_pointer))
|
|
||||||
#else
|
|
||||||
#define SSEFUNCTION
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define SSEFUNCTION
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#ifdef __SSE__
|
|
||||||
#ifdef __GNUC__
|
|
||||||
#if defined(WIN32) && !defined( __x86_64__ )
|
|
||||||
// needed for actual versions of GCC with 32-Bit Windows
|
|
||||||
#define SSEFUNCTION __attribute__((force_align_arg_pointer))
|
|
||||||
#else
|
|
||||||
#define SSEFUNCTION
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define SSEFUNCTION
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define SSEFUNCTION
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
#define RESTRICT __restrict__
|
#define RESTRICT __restrict__
|
||||||
#define LIKELY(x) __builtin_expect (!!(x), 1)
|
#define LIKELY(x) __builtin_expect (!!(x), 1)
|
||||||
#define UNLIKELY(x) __builtin_expect (!!(x), 0)
|
#define UNLIKELY(x) __builtin_expect (!!(x), 0)
|
||||||
#if (!defined(WIN32) || defined( __x86_64__ ))
|
#define ALIGNED64 __attribute__ ((aligned (64)))
|
||||||
#define ALIGNED64 __attribute__ ((aligned (64)))
|
#define ALIGNED16 __attribute__ ((aligned (16)))
|
||||||
#define ALIGNED16 __attribute__ ((aligned (16)))
|
|
||||||
#else // there is a bug in gcc 4.7.x when using openmp and aligned memory and -O3, also needed for WIN32 builds
|
|
||||||
#define ALIGNED64
|
|
||||||
#define ALIGNED16
|
|
||||||
#endif
|
|
||||||
#else
|
#else
|
||||||
#define RESTRICT
|
#define RESTRICT
|
||||||
#define LIKELY(x) (x)
|
#define LIKELY(x) (x)
|
||||||
@ -71,7 +41,4 @@
|
|||||||
#define ALIGNED64
|
#define ALIGNED64
|
||||||
#define ALIGNED16
|
#define ALIGNED16
|
||||||
#endif
|
#endif
|
||||||
#if defined _OPENMP
|
|
||||||
#define _RT_NESTED_OPENMP
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1341,7 +1341,7 @@ int RawImageSource::interpolateBadPixelsXtrans( PixelsMap &bitmapBads )
|
|||||||
* (Taken from Emil Martinec idea)
|
* (Taken from Emil Martinec idea)
|
||||||
* (Optimized by Ingo Weyrich 2013 and 2015)
|
* (Optimized by Ingo Weyrich 2013 and 2015)
|
||||||
*/
|
*/
|
||||||
SSEFUNCTION int RawImageSource::findHotDeadPixels( PixelsMap &bpMap, float thresh, bool findHotPixels, bool findDeadPixels )
|
int RawImageSource::findHotDeadPixels( PixelsMap &bpMap, float thresh, bool findHotPixels, bool findDeadPixels )
|
||||||
{
|
{
|
||||||
float varthresh = (20.0 * (thresh / 100.0) + 1.0 ) / 24.f;
|
float varthresh = (20.0 * (thresh / 100.0) + 1.0 ) / 24.f;
|
||||||
|
|
||||||
@ -3169,7 +3169,7 @@ void RawImageSource::copyOriginalPixels(const RAWParams &raw, RawImage *src, Raw
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur, const int boxH, const int boxW)
|
void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur, const int boxH, const int boxW)
|
||||||
{
|
{
|
||||||
|
|
||||||
if(boxW == 0 && boxH == 0) { // nothing to blur
|
if(boxW == 0 && boxH == 0) { // nothing to blur
|
||||||
|
@ -358,7 +358,7 @@ void SHMap::forceStat (float max_, float min_, float avg_)
|
|||||||
avg = avg_;
|
avg = avg_;
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, int height, LUTf & rangefn, int level, int scale)
|
void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, int width, int height, LUTf & rangefn, int level, int scale)
|
||||||
{
|
{
|
||||||
//scale is spacing of directional averaging weights
|
//scale is spacing of directional averaging weights
|
||||||
|
|
||||||
@ -375,7 +375,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
|
|||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
vfloat dirwtv, valv, normv, dftemp1v, dftemp2v;
|
vfloat dirwtv, valv, normv, dftemp1v, dftemp2v;
|
||||||
#endif // __SSE2__
|
#endif // __SSE2__
|
||||||
int j;
|
int j;
|
||||||
@ -402,7 +402,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
|
|||||||
data_coarse[i][j] = val / norm; // low pass filter
|
data_coarse[i][j] = val / norm; // low pass filter
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
int inbrMin = max(i - scalewin, i % scale);
|
int inbrMin = max(i - scalewin, i % scale);
|
||||||
|
|
||||||
for(; j < (width - scalewin) - 3; j += 4) {
|
for(; j < (width - scalewin) - 3; j += 4) {
|
||||||
@ -482,7 +482,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
|
|||||||
#pragma omp parallel
|
#pragma omp parallel
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
vfloat dirwtv, valv, normv, dftemp1v, dftemp2v;
|
vfloat dirwtv, valv, normv, dftemp1v, dftemp2v;
|
||||||
float domkerv[5][5][4] ALIGNED16 = {{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}};
|
float domkerv[5][5][4] ALIGNED16 = {{{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {2, 2, 2, 2}, {2, 2, 2, 2}, {2, 2, 2, 2}, {1, 1, 1, 1}}, {{1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}, {1, 1, 1, 1}}};
|
||||||
|
|
||||||
@ -510,7 +510,7 @@ SSEFUNCTION void SHMap::dirpyr_shmap(float ** data_fine, float ** data_coarse, i
|
|||||||
data_coarse[i][j] = val / norm; // low pass filter
|
data_coarse[i][j] = val / norm; // low pass filter
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined( __SSE2__ ) && defined( __x86_64__ )
|
#ifdef __SSE2__
|
||||||
|
|
||||||
for(; j < width - scalewin - 3; j += 4) {
|
for(; j < width - scalewin - 3; j += 4) {
|
||||||
valv = _mm_setzero_ps();
|
valv = _mm_setzero_ps();
|
||||||
|
@ -906,7 +906,7 @@ typedef struct {
|
|||||||
static INLINE vfloat vabsf(vfloat f) { return (vfloat)vandnotm((vmask)vcast_vf_f(-0.0f), (vmask)f); }
|
static INLINE vfloat vabsf(vfloat f) { return (vfloat)vandnotm((vmask)vcast_vf_f(-0.0f), (vmask)f); }
|
||||||
static INLINE vfloat vnegf(vfloat f) { return (vfloat)vxorm((vmask)f, (vmask)vcast_vf_f(-0.0f)); }
|
static INLINE vfloat vnegf(vfloat f) { return (vfloat)vxorm((vmask)f, (vmask)vcast_vf_f(-0.0f)); }
|
||||||
|
|
||||||
#if defined( __SSE4_1__ ) && defined( __x86_64__ )
|
#ifdef __SSE4_1__
|
||||||
// only one instruction when using SSE4.1
|
// only one instruction when using SSE4.1
|
||||||
static INLINE vfloat vself(vmask mask, vfloat x, vfloat y) {
|
static INLINE vfloat vself(vmask mask, vfloat x, vfloat y) {
|
||||||
return _mm_blendv_ps(y,x,(vfloat)mask);
|
return _mm_blendv_ps(y,x,(vfloat)mask);
|
||||||
|
@ -73,14 +73,8 @@ void BatchQueueEntryUpdater::process (guint8* oimg, int ow, int oh, int newh, BQ
|
|||||||
stopped = false;
|
stopped = false;
|
||||||
tostop = false;
|
tostop = false;
|
||||||
|
|
||||||
#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && defined( WIN32 ) && defined(__x86_64__)
|
|
||||||
#undef THREAD_PRIORITY_NORMAL
|
|
||||||
// See Issue 2384 comment #3
|
|
||||||
thread = Glib::Thread::create(sigc::mem_fun(*this, &BatchQueueEntryUpdater::processThread), (unsigned long int)0, true, true, Glib::THREAD_PRIORITY_NORMAL);
|
|
||||||
#else
|
|
||||||
#undef THREAD_PRIORITY_LOW
|
#undef THREAD_PRIORITY_LOW
|
||||||
thread = Glib::Thread::create(sigc::mem_fun(*this, &BatchQueueEntryUpdater::processThread), (unsigned long int)0, true, true, Glib::THREAD_PRIORITY_LOW);
|
thread = Glib::Thread::create(sigc::mem_fun(*this, &BatchQueueEntryUpdater::processThread), (unsigned long int)0, true, true, Glib::THREAD_PRIORITY_LOW);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -944,7 +944,7 @@ void HistogramArea::update (LUTu &histRed, LUTu &histGreen, LUTu &histBlue, LUTu
|
|||||||
idle_register.add(func, haih);
|
idle_register.add(func, haih);
|
||||||
}
|
}
|
||||||
|
|
||||||
SSEFUNCTION void HistogramArea::updateBackBuffer ()
|
void HistogramArea::updateBackBuffer ()
|
||||||
{
|
{
|
||||||
|
|
||||||
if (!get_realized ()) {
|
if (!get_realized ()) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user