Removed most of the __x86_64__ dependencies. Also removed _RT_NESTED_OPENMP

This commit is contained in:
heckflosse
2018-01-23 16:05:31 +01:00
parent 4046a3c9b9
commit 96bf06a950
28 changed files with 171 additions and 226 deletions

View File

@@ -140,7 +140,7 @@ struct cont_params {
int wavNestedLevels = 1;
SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const procparams::WaveletParams & waparams, const WavCurve & wavCLVCcurve, const WavOpacityCurveRG & waOpacityCurveRG, const WavOpacityCurveBY & waOpacityCurveBY, const WavOpacityCurveW & waOpacityCurveW, const WavOpacityCurveWL & waOpacityCurveWL, LUTf &wavclCurve, int skip)
void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int kall, const procparams::WaveletParams & waparams, const WavCurve & wavCLVCcurve, const WavOpacityCurveRG & waOpacityCurveRG, const WavOpacityCurveBY & waOpacityCurveBY, const WavOpacityCurveW & waOpacityCurveW, const WavOpacityCurveWL & waOpacityCurveWL, LUTf &wavclCurve, int skip)
{
@@ -645,7 +645,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
numthreads = MIN(numthreads, maxnumberofthreadsforwavelet);
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
wavNestedLevels = omp_get_max_threads() / numthreads;
bool oldNested = omp_get_nested();
@@ -720,7 +720,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
Lold = lab->L;
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
@@ -784,7 +784,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
}
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
@@ -868,7 +868,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
if(!Ldecomp->memoryAllocationFailed) {
float madL[8][3];
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
@@ -1076,7 +1076,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
bool highlight = params->toneCurve.hrenabled;
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16) num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
@@ -1246,7 +1246,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
delete [] varchro;
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
omp_set_nested(oldNested);
#endif
@@ -1276,12 +1276,12 @@ void ImProcFunctions::Aver( float * RESTRICT DataList, int datalen, float &aver
float thres = 5.f;//different fom zero to take into account only data large enough
max = 0.f;
min = 0.f;
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
{
float lmax = 0.f, lmin = 0.f;
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for reduction(+:averaP,averaN,countP,countN) nowait
#endif
@@ -1305,7 +1305,7 @@ void ImProcFunctions::Aver( float * RESTRICT DataList, int datalen, float &aver
}
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp critical
#endif
{
@@ -1335,7 +1335,7 @@ void ImProcFunctions::Sigma( float * RESTRICT DataList, int datalen, float aver
float variP = 0.f, variN = 0.f;
float thres = 5.f;//different fom zero to take into account only data large enough
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for reduction(+:variP,variN,countP,countN) num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
@@ -1434,7 +1434,7 @@ float *ImProcFunctions::ContrastDR(float *Source, int W_L, int H_L, float *Contr
}
memcpy(Contrast, Source, n * sizeof(float));
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for
#endif
@@ -1445,19 +1445,19 @@ float *ImProcFunctions::ContrastDR(float *Source, int W_L, int H_L, float *Contr
return Contrast;
}
SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost, float *Compressed)
float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L, float Compression, float DetailBoost, float *Compressed)
{
const float eps = 0.000001f;
int n = W_L * H_L;
#ifdef __SSE2__
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel
#endif
{
__m128 epsv = _mm_set1_ps( eps );
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for
#endif
@@ -1471,7 +1471,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L,
}
#else
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for
#endif
@@ -1508,7 +1508,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L,
}
#ifdef __SSE2__
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel
#endif
{
@@ -1516,7 +1516,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L,
__m128 epsv = _mm_set1_ps( eps );
__m128 DetailBoostv = _mm_set1_ps( DetailBoost );
__m128 tempv = _mm_set1_ps( temp );
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for
#endif
@@ -1537,7 +1537,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int W_L, int H_L,
}
#else
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for
#endif
@@ -1572,7 +1572,7 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, struct cont_params &cp
min0 = 0.0f;
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for
#endif
@@ -1592,7 +1592,7 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, struct cont_params &cp
CompressDR(WavCoeffs_L0, W_L, H_L, Compression, DetailBoost, WavCoeffs_L0);
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for // removed schedule(dynamic,10)
#endif
@@ -1624,7 +1624,7 @@ void ImProcFunctions::EPDToneMapResid(float * WavCoeffs_L0, unsigned int Iterat
}
// max0=32768.f;
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for
#endif
@@ -1649,7 +1649,7 @@ void ImProcFunctions::EPDToneMapResid(float * WavCoeffs_L0, unsigned int Iterat
epd2.CompressDynamicRange(WavCoeffs_L0, (float)sca / skip, edgest, Compression, DetailBoost, Iterates, rew);
//Restore past range, also desaturate a bit per Mantiuk's Color correction for tone mapping.
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for // removed schedule(dynamic,10)
#endif
@@ -1692,7 +1692,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
float min0 = FLT_MAX;
if(contrast != 0.f || (cp.tonemap && cp.resena)) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for reduction(+:avedbl) num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
@@ -1700,14 +1700,14 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
avedbl += WavCoeffs_L0[i];
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
{
float lminL = FLT_MAX;
float lmaxL = 0.f;
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for
#endif
@@ -1722,7 +1722,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp critical
#endif
{
@@ -1782,13 +1782,13 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
koeLi[j][i] = 0.f;
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
{
if(contrast != 0.f && cp.resena) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step
{
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for
#endif
@@ -1817,18 +1817,18 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
if(cp.tonemap && cp.contmet == 1 && cp.resena) {
float maxp = max0 * 256.f;
float minp = min0 * 256.f;
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp single
#endif
ContrastResid(WavCoeffs_L0, cp, W_L, H_L, maxp, minp);
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp barrier
#endif
if((cp.conres != 0.f || cp.conresH != 0.f) && cp.resena) { // cp.conres = 0.f and cp.comresH = 0.f means that all will be multiplied by 1.f, so we can skip this step
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for nowait
#endif
@@ -1890,7 +1890,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
tmC[i] = &tmCBuffer[i * W_L];
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for schedule(dynamic) collapse(2)
#endif
@@ -1910,7 +1910,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
float aamp = 1.f + cp.eddetthrHi / 100.f;
for (int lvl = 0; lvl < 4; lvl++) {
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for schedule(dynamic,16)
#endif
@@ -2013,7 +2013,7 @@ void ImProcFunctions::WaveletcontAllL(LabImage * labco, float ** varhue, float *
// end
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for schedule(dynamic) collapse(2)
#endif
@@ -2048,7 +2048,7 @@ void ImProcFunctions::WaveletAandBAllAB(wavelet_decomposition &WaveletCoeffs_a,
float * WavCoeffs_a0 = WaveletCoeffs_a.coeff0;
float * WavCoeffs_b0 = WaveletCoeffs_b.coeff0;
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
{
@@ -2056,7 +2056,7 @@ void ImProcFunctions::WaveletAandBAllAB(wavelet_decomposition &WaveletCoeffs_a,
float huebuffer[W_L] ALIGNED64;
float chrbuffer[W_L] ALIGNED64;
#endif // __SSE2__
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for schedule(dynamic,16)
#endif
@@ -2116,13 +2116,13 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float
float * WavCoeffs_ab0 = WaveletCoeffs_ab.coeff0;
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif
{
if(cp.chrores != 0.f && cp.resena) { // cp.chrores == 0.f means all will be multiplied by 1.f, so we can skip the processing of residual
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for nowait
#endif
@@ -2175,7 +2175,7 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float
if(cp.cbena && cp.resena) {//if user select Toning and color balance
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for nowait
#endif
@@ -2230,7 +2230,7 @@ void ImProcFunctions::WaveletcontAllAB(LabImage * labco, float ** varhue, float
}
}
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp for schedule(dynamic) collapse(2)
#endif
@@ -2425,7 +2425,7 @@ void ImProcFunctions::finalContAllL (float ** WavCoeffs_L, float * WavCoeffs_L0,
float bsig = 0.5f - asig * mean[level];
float amean = 0.5f / mean[level];
#ifdef _RT_NESTED_OPENMP
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic, W_L * 16) num_threads(wavNestedLevels) if(wavNestedLevels>1)
#endif