Disable nested parallelism when compiled with clang, Issue 2731
This commit is contained in:
@@ -645,7 +645,8 @@ do {
|
||||
// Calculate number of tiles. If less than omp_get_max_threads(), then limit num_threads to number of tiles
|
||||
int numthreads = MIN(numtiles,omp_get_max_threads());
|
||||
if(options.rgbDenoiseThreadLimit > 0)
|
||||
numthreads = MIN(numthreads,options.rgbDenoiseThreadLimit);
|
||||
numthreads = MIN(numthreads,options.rgbDenoiseThreadLimit);
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
denoiseNestedLevels = omp_get_max_threads() / numthreads;
|
||||
bool oldNested = omp_get_nested();
|
||||
if(denoiseNestedLevels < 2)
|
||||
@@ -654,7 +655,8 @@ do {
|
||||
omp_set_nested(true);
|
||||
if(options.rgbDenoiseThreadLimit > 0)
|
||||
while(denoiseNestedLevels*numthreads > options.rgbDenoiseThreadLimit)
|
||||
denoiseNestedLevels--;
|
||||
denoiseNestedLevels--;
|
||||
#endif
|
||||
if(settings->verbose)
|
||||
printf("RGB_denoise uses %d main thread(s) and up to %d nested thread(s) for each main thread\n",numthreads,denoiseNestedLevels);
|
||||
#endif
|
||||
@@ -736,8 +738,8 @@ do {
|
||||
|
||||
if(!denoiseMethodRgb){//lab mode
|
||||
//modification Jacques feb 2013 and july 2014
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i++) {
|
||||
int i1 = i - tiletop;
|
||||
@@ -781,8 +783,8 @@ do {
|
||||
}
|
||||
}
|
||||
} else {//RGB mode
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i++) {
|
||||
int i1 = i - tiletop;
|
||||
@@ -814,8 +816,8 @@ do {
|
||||
}
|
||||
}
|
||||
} else {//image is not raw; use Lab parametrization
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i++) {
|
||||
int i1 = i - tiletop;
|
||||
@@ -928,7 +930,7 @@ do {
|
||||
if(!memoryAllocationFailed) {
|
||||
// precalculate madL, because it's used in adecomp and bdecomp
|
||||
int maxlvl = Ldecomp->maxlevel();
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
for (int lvl=0; lvl<maxlvl; lvl++) {
|
||||
@@ -1023,7 +1025,7 @@ do {
|
||||
if(!memoryAllocationFailed) {
|
||||
// copy labdn->L to Lin before it gets modified by reconstruction
|
||||
Lin = new array2D<float>(width,height);
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
for(int i=0;i<height;i++)
|
||||
@@ -1134,14 +1136,14 @@ do {
|
||||
fLbloxArray[i] = (float*) fftwf_malloc(max_numblox_W*TS*TS*sizeof(float));
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
int masterThread = omp_get_thread_num();
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
int subThread = masterThread * denoiseNestedLevels + omp_get_thread_num();
|
||||
#else
|
||||
int subThread = 0;
|
||||
@@ -1151,7 +1153,7 @@ do {
|
||||
float *fLblox = fLbloxArray[subThread];
|
||||
float pBuf[width + TS + 2*blkrad*offset] ALIGNED16;
|
||||
float nbrwt[TS*TS] ALIGNED64;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int vblk=0; vblk<numblox_H; vblk++) {
|
||||
@@ -1239,7 +1241,7 @@ do {
|
||||
}
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
for (int i=0; i<height; i++) {
|
||||
@@ -1283,8 +1285,8 @@ do {
|
||||
realred /= 100.f;
|
||||
realblue /= 100.f;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for schedule(dynamic,16) num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for schedule(dynamic,16) num_threads(denoiseNestedLevels)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i++){
|
||||
int i1 = i-tiletop;
|
||||
@@ -1329,8 +1331,8 @@ do {
|
||||
}
|
||||
}
|
||||
} else {//RGB mode
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i++){
|
||||
int i1 = i-tiletop;
|
||||
@@ -1365,8 +1367,8 @@ do {
|
||||
|
||||
}
|
||||
} else {
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(denoiseNestedLevels)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i++){
|
||||
int i1 = i-tiletop;
|
||||
@@ -1427,7 +1429,7 @@ do {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
omp_set_nested(oldNested);
|
||||
#endif
|
||||
//copy denoised image to output
|
||||
@@ -1854,7 +1856,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposit
|
||||
maxHL = WaveletCoeffs_L.level_H(lvl);
|
||||
}
|
||||
bool memoryAllocationFailed = false;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
@@ -1868,7 +1870,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkL(wavelet_decomposit
|
||||
|
||||
if(!memoryAllocationFailed) {
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int lvl=maxlvl-1; lvl>=0; lvl--) {//for levels less than max, use level diff to make edge mask
|
||||
@@ -1968,7 +1970,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
||||
maxHL = WaveletCoeffs_L.level_H(lvl);
|
||||
}
|
||||
bool memoryAllocationFailed = false;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
@@ -1983,7 +1985,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
||||
if(!memoryAllocationFailed) {
|
||||
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int lvl=0; lvl<maxlvl; lvl++) {
|
||||
@@ -2001,7 +2003,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int lvl=maxlvl-1; lvl>=0; lvl--) {//for levels less than max, use level diff to make edge mask
|
||||
@@ -2086,7 +2088,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
||||
maxHL = WaveletCoeffs_L.level_H(lvl);
|
||||
}
|
||||
bool memoryAllocationFailed = false;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
@@ -2099,7 +2101,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
||||
}
|
||||
|
||||
if(!memoryAllocationFailed) {
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int lvl=0; lvl<maxlvl; lvl++) {
|
||||
@@ -2130,7 +2132,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
||||
maxHL = WaveletCoeffs_L.level_H(lvl);
|
||||
}
|
||||
bool memoryAllocationFailed = false;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(denoiseNestedLevels) if(denoiseNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
@@ -2143,7 +2145,7 @@ SSEFUNCTION bool ImProcFunctions::WaveletDenoiseAll_BiShrinkAB(wavelet_decomposi
|
||||
}
|
||||
|
||||
if(!memoryAllocationFailed) {
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int lvl=0; lvl<maxlvl; lvl++) {
|
||||
@@ -2589,7 +2591,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
||||
for (int i=0; i<hei; i++)
|
||||
bcalc[i] = new float[wid];
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for if(multiThread)
|
||||
#endif
|
||||
for(int ii=0;ii<hei;ii++){
|
||||
@@ -2707,7 +2709,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
||||
//fill tile from image; convert RGB to "luma/chroma"
|
||||
|
||||
if (isRAW) {//image is raw; use channel differences for chroma channels
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for if(multiThread)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i+=2) {
|
||||
@@ -2747,7 +2749,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for if(multiThread)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i+=2) {
|
||||
@@ -2762,7 +2764,7 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
||||
}
|
||||
if (!denoiseMethodRgb){//lab mode, modification Jacques feb 2013 and july 2014
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for if(multiThread)
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i++) {
|
||||
@@ -2883,17 +2885,17 @@ SSEFUNCTION void ImProcFunctions::RGB_denoise_info(Imagefloat * src, Imagefloat
|
||||
schoice=2;
|
||||
|
||||
const int levwav=5;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel sections if(multiThread)
|
||||
#endif
|
||||
{
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp section
|
||||
#endif
|
||||
{
|
||||
adecomp = new wavelet_decomposition (labdn->data+datalen, labdn->W, labdn->H, levwav, 1 );
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp section
|
||||
#endif
|
||||
{
|
||||
|
||||
@@ -215,7 +215,7 @@ namespace rtengine {
|
||||
* Applies a Haar filter
|
||||
*
|
||||
*/
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
||||
#endif
|
||||
for (int k=0; k<height; k++) {
|
||||
@@ -234,18 +234,18 @@ namespace rtengine {
|
||||
* Applies a Haar filter
|
||||
*
|
||||
*/
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
||||
#endif
|
||||
{
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
for(int i = 0; i < skip; i++) {
|
||||
for(int j=0;j<width;j++)
|
||||
dst[width*i+j] = (srcLo[i*width+j] + srcHi[i*width+j]);
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for(int i = skip; i < height; i++) {
|
||||
@@ -394,7 +394,7 @@ namespace rtengine {
|
||||
|
||||
// calculate coefficients
|
||||
int shift = skip*(taps-offset-1);//align filter with data
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
||||
#endif
|
||||
for (int k=0; k<height; k++) {
|
||||
@@ -450,7 +450,7 @@ namespace rtengine {
|
||||
__m128 fourv = _mm_set1_ps(4.f);
|
||||
__m128 srcFactorv = _mm_set1_ps(srcFactor);
|
||||
__m128 dstFactorv = _mm_set1_ps(blend);
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
||||
#endif
|
||||
for(int i = 0; i < dstheight; i++) {
|
||||
@@ -509,7 +509,7 @@ namespace rtengine {
|
||||
// calculate coefficients
|
||||
int shift=skip*(taps-offset-1);//align filter with data
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(numThreads) if(numThreads>1)
|
||||
#endif
|
||||
for(int i = 0; i < dstheight; i++) {
|
||||
@@ -550,14 +550,14 @@ namespace rtengine {
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
||||
#endif
|
||||
{
|
||||
T tmpLo[m_w] ALIGNED64;
|
||||
T tmpHi[m_w] ALIGNED64;
|
||||
if(subsamp_out) {
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for(int row=0;row<m_h;row+=2) {
|
||||
@@ -566,7 +566,7 @@ namespace rtengine {
|
||||
AnalysisFilterSubsampHorizontal (tmpHi, wavcoeffs[2], wavcoeffs[3], filterH, filterH+taps, taps, offset, m_w, m_w2, row/2);
|
||||
}
|
||||
} else {
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for(int row=0;row<m_h;row++) {
|
||||
@@ -580,7 +580,7 @@ namespace rtengine {
|
||||
#else
|
||||
template<typename T> template<typename E> void wavelet_level<T>::decompose_level(E *src, E *dst, float *filterV, float *filterH, int taps, int offset) {
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(numThreads) if(numThreads>1)
|
||||
#endif
|
||||
{
|
||||
@@ -588,7 +588,7 @@ namespace rtengine {
|
||||
T tmpHi[m_w] ALIGNED64;
|
||||
/* filter along rows and columns */
|
||||
if(subsamp_out) {
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for(int row=0;row<m_h;row+=2) {
|
||||
@@ -597,7 +597,7 @@ namespace rtengine {
|
||||
AnalysisFilterSubsampHorizontal (tmpHi, wavcoeffs[2], wavcoeffs[3], filterH, filterH+taps, taps, offset, m_w, m_w2, row/2);
|
||||
}
|
||||
} else {
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for(int row=0;row<m_h;row++) {
|
||||
|
||||
@@ -438,7 +438,8 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
||||
|
||||
numthreads = MIN(numtiles,omp_get_max_threads());
|
||||
if(maxnumberofthreadsforwavelet > 0)
|
||||
numthreads = MIN(numthreads,maxnumberofthreadsforwavelet);
|
||||
numthreads = MIN(numthreads,maxnumberofthreadsforwavelet);
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
wavNestedLevels = omp_get_max_threads() / numthreads;
|
||||
bool oldNested = omp_get_nested();
|
||||
if(wavNestedLevels < 2)
|
||||
@@ -447,7 +448,8 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
||||
omp_set_nested(true);
|
||||
if(maxnumberofthreadsforwavelet > 0)
|
||||
while(wavNestedLevels*numthreads > maxnumberofthreadsforwavelet)
|
||||
wavNestedLevels--;
|
||||
wavNestedLevels--;
|
||||
#endif
|
||||
if(settings->verbose)
|
||||
printf("Ip Wavelet uses %d main thread(s) and up to %d nested thread(s) for each main thread\n",numthreads,wavNestedLevels);
|
||||
|
||||
@@ -493,16 +495,16 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
||||
for (int i=0; i<tileheight; i++)
|
||||
Lold[i] = LoldBuffer + i*tilewidth;
|
||||
}
|
||||
|
||||
|
||||
} else {
|
||||
labco = new LabImage(width,height);
|
||||
Lold = lab->L;
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
|
||||
|
||||
for (int i=tiletop; i<tilebottom; i++) {
|
||||
int i1 = i - tiletop;
|
||||
int j;
|
||||
@@ -554,7 +556,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
for (int i=1; i<hei-1; i++) {
|
||||
@@ -618,7 +620,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
||||
|
||||
float madL[8][3];
|
||||
bool memoryAllocationFailed = false;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for schedule(dynamic) collapse(2) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
for (int lvl=0; lvl<3; lvl++) {
|
||||
@@ -777,9 +779,9 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
||||
}
|
||||
bool highlight = params->toneCurve.hrenabled;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for schedule(dynamic,16) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
#endif
|
||||
for (int i=tiletop; i<tilebottom; i++){
|
||||
int i1 = i-tiletop;
|
||||
float L,a,b;
|
||||
@@ -928,7 +930,7 @@ SSEFUNCTION void ImProcFunctions::ip_wavelet(LabImage * lab, LabImage * dst, int
|
||||
delete [] sigmaN;
|
||||
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
omp_set_nested(oldNested);
|
||||
#endif
|
||||
if(numtiles > 1) {
|
||||
@@ -936,10 +938,10 @@ omp_set_nested(oldNested);
|
||||
delete dsttmp;
|
||||
}
|
||||
|
||||
// if (settings->verbose) {
|
||||
if (settings->verbose) {
|
||||
t2e.set();
|
||||
printf("Wavelet performed in %d usec:\n", t2e.etime(t1e));
|
||||
// }
|
||||
}
|
||||
|
||||
}//end o
|
||||
|
||||
@@ -960,12 +962,12 @@ omp_set_nested(oldNested);
|
||||
float thres = 5.f;//different fom zero to take into account only data large enough
|
||||
max=0.f;
|
||||
min=0.f;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
float lmax = 0.f, lmin = 0.f;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for reduction(+:averaP,averaN,countP,countN) nowait
|
||||
#endif
|
||||
for(int i=0;i<datalen;i++) {
|
||||
@@ -982,7 +984,7 @@ omp_set_nested(oldNested);
|
||||
countN++;
|
||||
}
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp critical
|
||||
#endif
|
||||
{
|
||||
@@ -1002,7 +1004,7 @@ omp_set_nested(oldNested);
|
||||
float variP = 0.f, variN = 0.f;
|
||||
float thres = 5.f;//different fom zero to take into account only data large enough
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for reduction(+:variP,variN,countP,countN) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
for(int i=0;i<datalen;i++) {
|
||||
@@ -1087,7 +1089,7 @@ float *ImProcFunctions::ContrastDR(float *Source, int skip, struct cont_params c
|
||||
int n=W_L*H_L;
|
||||
if(Contrast == NULL) Contrast = new float[n];
|
||||
memcpy(Contrast, Source, n*sizeof(float));
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for (int i=0; i<W_L*H_L; i++) {//contrast
|
||||
@@ -1122,12 +1124,12 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int skip, struct c
|
||||
int n=W_L*H_L;
|
||||
|
||||
#ifdef __SSE2__
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
__m128 epsv = _mm_set1_ps( eps );
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for(int ii = 0; ii < n-3; ii+=4)
|
||||
@@ -1137,7 +1139,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int skip, struct c
|
||||
Source[ii] = xlogf(Source[ii] + eps);
|
||||
|
||||
#else
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(int ii = 0; ii < n; ii++)
|
||||
@@ -1168,7 +1170,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int skip, struct c
|
||||
|
||||
|
||||
#ifdef __SSE2__
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
@@ -1176,7 +1178,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int skip, struct c
|
||||
__m128 epsv = _mm_set1_ps( eps );
|
||||
__m128 DetailBoostv = _mm_set1_ps( DetailBoost );
|
||||
__m128 tempv = _mm_set1_ps( temp );
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for(int i = 0; i < n-3; i+=4){
|
||||
@@ -1195,7 +1197,7 @@ SSEFUNCTION float *ImProcFunctions::CompressDR(float *Source, int skip, struct c
|
||||
}
|
||||
|
||||
#else
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(int i = 0; i < n; i++){
|
||||
@@ -1218,7 +1220,9 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, unsigned int Iterates
|
||||
cp.TMmeth=2;//default after testing
|
||||
if(cp.TMmeth ==1) {min0 = 0.0f;max0=32768.f;}
|
||||
else if (cp.TMmeth ==2) {min0 = 0.0f;}
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(int i = 0; i < W_L*H_L; i++)
|
||||
{ WavCoeffs_L0[i]= (WavCoeffs_L0[i] - min0)/max0;
|
||||
WavCoeffs_L0[i]*=gamm;
|
||||
@@ -1232,9 +1236,9 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, unsigned int Iterates
|
||||
CompressDR(WavCoeffs_L0, skip, cp, W_L, H_L, Compression,DetailBoost,max0, min0, ave, ah, bh, al, bl, factorx, WavCoeffs_L0);
|
||||
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for // removed schedule(dynamic,10)
|
||||
#endif
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for // removed schedule(dynamic,10)
|
||||
#endif
|
||||
for(int ii = 0; ii < W_L*H_L; ii++)
|
||||
WavCoeffs_L0[ii] = WavCoeffs_L0[ii]*max0*(1.f/gamm) + min0;
|
||||
}
|
||||
@@ -1255,7 +1259,9 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, unsigned int Iterates
|
||||
if(cp.TMmeth ==1) {min0 = 0.0f;max0=32768.f;}
|
||||
else if (cp.TMmeth ==2) {min0 = 0.0f;}
|
||||
// max0=32768.f;
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
for(int i = 0; i < W_L*H_L; i++)
|
||||
{ WavCoeffs_L0[i]= (WavCoeffs_L0[i] - min0)/max0;
|
||||
WavCoeffs_L0[i]*=gamm;
|
||||
@@ -1272,9 +1278,9 @@ void ImProcFunctions::ContrastResid(float * WavCoeffs_L0, unsigned int Iterates
|
||||
epd.CompressDynamicRange(WavCoeffs_L0, sca/float(skip), edgest, Compression, DetailBoost, Iterates, rew, WavCoeffs_L0);
|
||||
|
||||
//Restore past range, also desaturate a bit per Mantiuk's Color correction for tone mapping.
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for // removed schedule(dynamic,10)
|
||||
#endif
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for // removed schedule(dynamic,10)
|
||||
#endif
|
||||
for(int ii = 0; ii < W_L*H_L; ii++)
|
||||
WavCoeffs_L0[ii] = WavCoeffs_L0[ii]*max0*(1.f/gamm) + min0;
|
||||
}
|
||||
@@ -1288,7 +1294,7 @@ void ImProcFunctions::WaveletcontAllLfinal(LabImage * labco, float ** varhue, fl
|
||||
float * WavCoeffs_L0 = WaveletCoeffs_L.coeff0;
|
||||
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int dir=1; dir<4; dir++) {
|
||||
@@ -1326,24 +1332,31 @@ void ImProcFunctions::WaveletcontAllLfinal(LabImage * labco, float ** varhue, fl
|
||||
float min0 = FLT_MAX;
|
||||
|
||||
if(contrast != 0.f || cp.tonemap) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel for reduction(+:avedbl) num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
for (int i=0; i<W_L*H_L; i++) {
|
||||
avedbl += WavCoeffs_L0[i];
|
||||
}
|
||||
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
float lminL = FLT_MAX;
|
||||
float lmaxL = 0.f;
|
||||
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for(int i = 0; i < W_L*H_L; i++) {
|
||||
if(WavCoeffs_L0[i] < lminL) lminL = WavCoeffs_L0[i];
|
||||
if(WavCoeffs_L0[i] > lmaxL) lmaxL = WavCoeffs_L0[i];
|
||||
|
||||
}
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp critical
|
||||
#endif
|
||||
{ if(lminL < min0) min0 = lminL;
|
||||
if(lmaxL > max0) max0 = lmaxL;
|
||||
}
|
||||
@@ -1389,13 +1402,13 @@ if(cp.tonemap && cp.contmet==2) {
|
||||
for (int i=0; i<W_L*H_L; i++)
|
||||
koeLi[j][i]=0.f;
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
if(contrast != 0.f) { // contrast = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
||||
{
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i=0; i<W_L*H_L; i++) {//contrast
|
||||
@@ -1421,14 +1434,17 @@ if(cp.tonemap && cp.contmet==2) {
|
||||
if(cp.tonemap && cp.contmet==1) {
|
||||
float maxp=max0*256.f;
|
||||
float minp=min0*256.f;
|
||||
#pragma omp single
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp single
|
||||
#endif
|
||||
ContrastResid(WavCoeffs_L0, 5, skip, cp, W_L, H_L, maxp, minp, ave, ah, bh, al, bl, factorx );
|
||||
}
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp barrier
|
||||
|
||||
#endif
|
||||
|
||||
if(cp.conres != 0.f || cp.conresH != 0.f) { // cp.conres = 0.f and cp.comresH = 0.f means that all will be multiplied by 1.f, so we can skip this step
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
for (int i=0; i<W_L*H_L; i++) {
|
||||
@@ -1481,7 +1497,7 @@ if(cp.detectedge && lipschitz==true) { //enabled Lipschitz control...more memory
|
||||
for (int i=0; i<H_L; i++){
|
||||
tmC[i] = &tmCBuffer[i*W_L];
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int lvl=0; lvl<3; lvl++) {
|
||||
@@ -1499,7 +1515,7 @@ if(cp.detectedge && lipschitz==true) { //enabled Lipschitz control...more memory
|
||||
float aamp=1.f+cp.eddetthrHi/100.f;
|
||||
|
||||
for (int lvl=0; lvl<3; lvl++) {
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic,16)
|
||||
#endif
|
||||
for (int i=1; i<H_L-1; i++) {
|
||||
@@ -1561,7 +1577,7 @@ if(cp.detectedge && lipschitz==true) { //enabled Lipschitz control...more memory
|
||||
// end
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int dir=1; dir<4; dir++) {
|
||||
@@ -1593,7 +1609,7 @@ if(cp.detectedge && lipschitz==true) { //enabled Lipschitz control...more memory
|
||||
|
||||
float * WavCoeffs_a0 = WaveletCoeffs_a.coeff0;
|
||||
float * WavCoeffs_b0 = WaveletCoeffs_b.coeff0;
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
@@ -1601,7 +1617,7 @@ if(cp.detectedge && lipschitz==true) { //enabled Lipschitz control...more memory
|
||||
float huebuffer[W_L] ALIGNED64;
|
||||
float chrbuffer[W_L] ALIGNED64;
|
||||
#endif // __SSE2__
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic,16)
|
||||
#endif
|
||||
for (int i=0; i<H_L; i++) {
|
||||
@@ -1655,13 +1671,13 @@ if(cp.detectedge && lipschitz==true) { //enabled Lipschitz control...more memory
|
||||
|
||||
float * WavCoeffs_ab0 = WaveletCoeffs_ab.coeff0;
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp parallel num_threads(wavNestedLevels) if(wavNestedLevels>1)
|
||||
#endif
|
||||
{
|
||||
if(cp.chrores != 0.f) { // cp.chrores == 0.f means all will be multiplied by 1.f, so we can skip the processing of residual
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for nowait
|
||||
#endif
|
||||
for (int i=0; i<W_L*H_L; i++) {
|
||||
@@ -1707,7 +1723,7 @@ if(cp.detectedge && lipschitz==true) { //enabled Lipschitz control...more memory
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#ifdef _RT_NESTED_OPENMP
|
||||
#pragma omp for schedule(dynamic) collapse(2)
|
||||
#endif
|
||||
for (int dir=1; dir<4; dir++) {
|
||||
|
||||
@@ -69,4 +69,7 @@
|
||||
#define ALIGNED64
|
||||
#define ALIGNED16
|
||||
#endif
|
||||
#ifndef __clang__
|
||||
#define _RT_NESTED_OPENMP _OPENMP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user