diff --git a/rtengine/CA_correct_RT.cc b/rtengine/CA_correct_RT.cc index 2fa589110..e27f894ee 100644 --- a/rtengine/CA_correct_RT.cc +++ b/rtengine/CA_correct_RT.cc @@ -151,7 +151,9 @@ float* RawImageSource::CA_correct_RT( blueFactor = new array2D((W + 1 - 2 * cb) / 2, (H + 1 - 2 * cb) / 2); oldraw = new array2D((W + 1- 2 * cb) / 2, H- 2 * cb); // copy raw values before ca correction +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = cb; i < H - cb; ++i) { for (int j = cb + (FC(i, 0) & 1); j < W - cb; j += 2) { (*oldraw)[i - cb][(j - cb) / 2] = rawData[i][j]; @@ -220,7 +222,9 @@ float* RawImageSource::CA_correct_RT( constexpr float eps = 1e-5f, eps2 = 1e-10f; //tolerance to avoid dividing by zero +#ifdef _OPENMP #pragma omp parallel +#endif { int progresscounter = 0; @@ -274,7 +278,9 @@ float* RawImageSource::CA_correct_RT( float blocksqavethr[2][2] = {}; float blockdenomthr[2][2] = {}; +#ifdef _OPENMP #pragma omp for collapse(2) schedule(dynamic) nowait +#endif for (int top = -border ; top < height; top += ts - border2) { for (int left = -border; left < width - (W & 1); left += ts - border2) { memset(bufferThr, 0, buffersize); @@ -647,7 +653,9 @@ float* RawImageSource::CA_correct_RT( progresscounter++; if (progresscounter % 8 == 0) { +#ifdef _OPENMP #pragma omp critical (cadetectpass1) +#endif { progress += 4.0 * SQR(ts - border2) / (iterations * height * width); progress = std::min(progress, 1.0); @@ -658,7 +666,9 @@ float* RawImageSource::CA_correct_RT( } } //end of diagnostic pass +#ifdef _OPENMP #pragma omp critical (cadetectpass2) +#endif { for (int dir = 0; dir < 2; dir++) { for (int c = 0; c < 2; c++) { @@ -668,9 +678,13 @@ float* RawImageSource::CA_correct_RT( } } } +#ifdef _OPENMP #pragma omp barrier +#endif +#ifdef _OPENMP #pragma omp single +#endif { for (int dir = 0; dir < 2; dir++) for (int c = 0; c < 2; c++) { @@ -806,7 +820,9 @@ float* RawImageSource::CA_correct_RT( float* grbdiff = (float (*)) (data + 2 * sizeof(float) * ts * ts + 3 * 64); // there is no overlap in buffer usage => share //green interpolated to optical sample points for R/B float* gshift = (float (*)) (data + 2 * sizeof(float) * ts * ts + sizeof(float) * ts * tsh + 4 * 64); // there is no overlap in buffer usage => share +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) +#endif for (int top = -border; top < height; top += ts - border2) { for (int left = -border; left < width - (W & 1); left += ts - border2) { memset(bufferThr, 0, buffersizePassTwo); @@ -1197,7 +1213,9 @@ float* RawImageSource::CA_correct_RT( progresscounter++; if (progresscounter % 8 == 0) +#ifdef _OPENMP #pragma omp critical (cacorrect) +#endif { progress += 4.0 * SQR(ts - border2) / (iterations * height * width); progress = std::min(progress, 1.0); @@ -1208,7 +1226,9 @@ float* RawImageSource::CA_correct_RT( } // copy temporary image matrix back to image matrix +#ifdef _OPENMP #pragma omp for +#endif for (int row = cb; row < height - cb; row++) { int col = cb + (FC(row, 0) & 1); @@ -1232,14 +1252,18 @@ float* RawImageSource::CA_correct_RT( // of red and blue channel and apply a gaussian blur to them. // Then we apply the resulting factors per pixel on the result of raw ca correction +#ifdef _OPENMP #pragma omp parallel +#endif { #ifdef __SSE2__ const vfloat onev = F2V(1.f); const vfloat twov = F2V(2.f); const vfloat zd5v = F2V(0.5f); #endif +#ifdef _OPENMP #pragma omp for +#endif for (int i = 0; i < H - 2 * cb; ++i) { const int firstCol = FC(i, 0) & 1; const int colour = FC(i, firstCol); @@ -1260,7 +1284,9 @@ float* RawImageSource::CA_correct_RT( } } +#ifdef _OPENMP #pragma omp single +#endif { if (H % 2) { // odd height => factors are not set in last row => use values of preceding row @@ -1287,7 +1313,9 @@ float* RawImageSource::CA_correct_RT( gaussianBlur(*blueFactor, *blueFactor, (W + 1 - 2 * cb) / 2, (H + 1 - 2 * cb) / 2, 30.0); // apply correction factors to avoid (reduce) colour shift +#ifdef _OPENMP #pragma omp for +#endif for (int i = 0; i < H - 2 * cb; ++i) { const int firstCol = FC(i, 0) & 1; const int colour = FC(i, firstCol); diff --git a/rtengine/EdgePreservingDecomposition.cc b/rtengine/EdgePreservingDecomposition.cc index f71e0043e..d946fe0f3 100644 --- a/rtengine/EdgePreservingDecomposition.cc +++ b/rtengine/EdgePreservingDecomposition.cc @@ -324,7 +324,9 @@ void MultiDiagonalSymmetricMatrix::VectorProduct(float* RESTRICT Product, float* } #endif +#ifdef _OPENMP #pragma omp single +#endif { #ifdef __SSE2__ diff --git a/rtengine/FTblockDN.cc b/rtengine/FTblockDN.cc index 1a3b80036..2a1a9fcb4 100644 --- a/rtengine/FTblockDN.cc +++ b/rtengine/FTblockDN.cc @@ -1838,11 +1838,15 @@ BENCHFUN for (int iteration = 1; iteration <= dnparams.passes; ++iteration) { +#ifdef _OPENMP #pragma omp parallel +#endif { if (methmed < 2) { +#ifdef _OPENMP #pragma omp for +#endif for (int i = 1; i < hei - 1; ++i) { if (methmed == 0) { @@ -1857,7 +1861,9 @@ BENCHFUN } } else { +#ifdef _OPENMP #pragma omp for +#endif for (int i = 2; i < hei - 2; ++i) { if (methmed == 3) { @@ -1901,7 +1907,9 @@ BENCHFUN if (methmed < 2) { +#ifdef _OPENMP #pragma omp for +#endif for (int i = 1; i < hei - 1; ++i) { if (methmed == 0) { @@ -1916,7 +1924,9 @@ BENCHFUN } } else { +#ifdef _OPENMP #pragma omp for +#endif for (int i = 2; i < hei - 2; ++i) { if (methmed == 3) { @@ -1961,7 +1971,9 @@ BENCHFUN if (methmed < 2) { +#ifdef _OPENMP #pragma omp for +#endif for (int i = 1; i < hei - 1; ++i) { if (methmed == 0) { @@ -1976,7 +1988,9 @@ BENCHFUN } } else { +#ifdef _OPENMP #pragma omp for +#endif for (int i = 2; i < hei - 2; ++i) { if (methmed == 3) { diff --git a/rtengine/bilateral2.h b/rtengine/bilateral2.h index 3356843b2..e754ac891 100644 --- a/rtengine/bilateral2.h +++ b/rtengine/bilateral2.h @@ -152,10 +152,14 @@ template void bilateral05 (T** src, T** dst, T** buffer, int W { BL_BEGIN(318, 1) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER3(1, 7, 7, 55) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(1) } @@ -164,10 +168,14 @@ template void bilateral06 (T** src, T** dst, T** buffer, int W { BL_BEGIN(768, 1) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER3(1, 4, 4, 16) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(1) } @@ -176,10 +184,14 @@ template void bilateral07 (T** src, T** dst, T** buffer, int W { BL_BEGIN(366, 2) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER5(0, 0, 1, 0, 8, 21, 1, 21, 59) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(2) } @@ -188,10 +200,14 @@ template void bilateral08 (T** src, T** dst, T** buffer, int W { BL_BEGIN(753, 2) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER5(0, 0, 1, 0, 5, 10, 1, 10, 23) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(2) } @@ -200,10 +216,14 @@ template void bilateral09 (T** src, T** dst, T** buffer, int W { BL_BEGIN(595, 2) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER5(0, 1, 2, 1, 6, 12, 2, 12, 22) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(2) } @@ -212,10 +232,14 @@ template void bilateral10 (T** src, T** dst, T** buffer, int W { BL_BEGIN(910, 2) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER5(0, 1, 2, 1, 4, 7, 2, 7, 12) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(2) } @@ -224,10 +248,14 @@ template void bilateral11 (T** src, T** dst, T** buffer, int W { BL_BEGIN(209, 3) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER7(0, 0, 1, 1, 0, 2, 5, 8, 1, 5, 18, 27, 1, 8, 27, 41) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(3) } @@ -236,10 +264,14 @@ template void bilateral12 (T** src, T** dst, T** buffer, int W { BL_BEGIN(322, 3) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER7(0, 0, 1, 1, 0, 1, 4, 6, 1, 4, 11, 16, 1, 6, 16, 23) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(3) } @@ -248,10 +280,14 @@ template void bilateral13 (T** src, T** dst, T** buffer, int W { BL_BEGIN(336, 3) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER7(0, 0, 1, 1, 0, 2, 4, 6, 1, 4, 11, 14, 1, 6, 14, 19) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(3) } @@ -260,10 +296,14 @@ template void bilateral14 (T** src, T** dst, T** buffer, int W { BL_BEGIN(195, 3) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER7(0, 1, 2, 3, 1, 4, 8, 10, 2, 8, 17, 21, 3, 10, 21, 28) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(3) } @@ -272,10 +312,14 @@ template void bilateral15 (T** src, T** dst, T** buffer, int W { BL_BEGIN(132, 4) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER9(0, 0, 0, 1, 1, 0, 1, 2, 4, 5, 0, 2, 6, 12, 14, 1, 4, 12, 22, 28, 1, 5, 14, 28, 35) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(4) } @@ -284,10 +328,14 @@ template void bilateral16 (T** src, T** dst, T** buffer, int W { BL_BEGIN(180, 4) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER9(0, 0, 0, 1, 1, 0, 1, 2, 3, 4, 0, 2, 5, 9, 10, 1, 3, 9, 15, 19, 1, 4, 10, 19, 23) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(4) } @@ -296,10 +344,14 @@ template void bilateral17 (T** src, T** dst, T** buffer, int W { BL_BEGIN(195, 4) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER9(0, 0, 1, 1, 1, 0, 1, 2, 3, 4, 1, 2, 5, 8, 9, 1, 3, 8, 13, 16, 1, 4, 9, 16, 19) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(4) } @@ -308,10 +360,14 @@ template void bilateral18 (T** src, T** dst, T** buffer, int W { BL_BEGIN(151, 4) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER9(0, 0, 1, 2, 2, 0, 1, 3, 5, 5, 1, 3, 6, 10, 12, 2, 5, 10, 16, 19, 2, 5, 12, 19, 22) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(4) } @@ -320,10 +376,14 @@ template void bilateral19 (T** src, T** dst, T** buffer, int W { BL_BEGIN(151, 4) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER9(0, 0, 1, 2, 2, 0, 1, 3, 4, 5, 1, 3, 5, 8, 9, 2, 4, 8, 12, 14, 2, 5, 9, 14, 16) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(4) } @@ -332,10 +392,14 @@ template void bilateral20 (T** src, T** dst, T** buffer, int W { BL_BEGIN(116, 5) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER11(0, 0, 0, 1, 1, 1, 0, 0, 1, 2, 3, 3, 0, 1, 2, 4, 7, 7, 1, 2, 4, 8, 12, 14, 1, 3, 7, 12, 18, 20, 1, 3, 7, 14, 20, 23) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(5) } @@ -344,10 +408,14 @@ template void bilateral21 (T** src, T** dst, T** buffer, int W { BL_BEGIN(127, 5) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER11(0, 0, 0, 1, 1, 1, 0, 0, 1, 2, 3, 3, 0, 1, 2, 4, 6, 7, 1, 2, 4, 8, 11, 12, 1, 3, 6, 11, 15, 17, 1, 3, 7, 12, 17, 19) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(5) } @@ -356,10 +424,14 @@ template void bilateral22 (T** src, T** dst, T** buffer, int W { BL_BEGIN(109, 5) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER11(0, 0, 0, 1, 1, 2, 0, 1, 2, 3, 3, 4, 1, 2, 3, 5, 7, 8, 1, 3, 5, 9, 12, 13, 1, 3, 7, 12, 16, 18, 2, 4, 8, 13, 18, 20) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(5) } @@ -368,10 +440,14 @@ template void bilateral23 (T** src, T** dst, T** buffer, int W { BL_BEGIN(132, 5) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER11(0, 0, 1, 1, 1, 1, 0, 1, 1, 2, 3, 3, 1, 1, 3, 5, 6, 7, 1, 2, 5, 7, 10, 11, 1, 3, 6, 10, 13, 14, 1, 3, 7, 11, 14, 16) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(5) } @@ -380,10 +456,14 @@ template void bilateral24 (T** src, T** dst, T** buffer, int W { BL_BEGIN(156, 5) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER11(0, 0, 1, 1, 1, 1, 0, 1, 1, 2, 3, 3, 1, 1, 3, 4, 5, 6, 1, 2, 4, 6, 8, 9, 1, 3, 5, 8, 10, 11, 1, 3, 6, 9, 11, 12) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(5) } @@ -392,10 +472,14 @@ template void bilateral25 (T** src, T** dst, T** buffer, int W { BL_BEGIN(173, 5) +#ifdef _OPENMP #pragma omp for +#endif BL_OPER11(0, 0, 1, 1, 1, 1, 0, 1, 1, 2, 3, 3, 1, 1, 2, 4, 5, 5, 1, 2, 4, 5, 7, 7, 1, 3, 5, 7, 9, 9, 1, 3, 5, 7, 9, 10) BL_FREE +#ifdef _OPENMP #pragma omp for +#endif BL_END(5) } diff --git a/rtengine/cfa_linedn_RT.cc b/rtengine/cfa_linedn_RT.cc index 2bb3649b6..f5563e3de 100644 --- a/rtengine/cfa_linedn_RT.cc +++ b/rtengine/cfa_linedn_RT.cc @@ -64,7 +64,9 @@ void RawImageSource::CLASS cfa_linedn(float noise, bool horizontal, bool vertica float noisevarm4 = 4.0f * noisevar; volatile double progress = 0.0; float* RawDataTmp = (float*)malloc( width * height * sizeof(float)); +#ifdef _OPENMP #pragma omp parallel +#endif { // allocate memory and assure the arrays don't have same 64 byte boundary to avoid L1 conflict misses @@ -76,7 +78,9 @@ void RawImageSource::CLASS cfa_linedn(float noise, bool horizontal, bool vertica float linehvar[4], linevvar[4], noisefactor[4][8][2], coeffsq; float dctblock[4][8][8]; +#ifdef _OPENMP #pragma omp for +#endif for(int i = 0; i < height; i++) for(int j = 0; j < width; j++) { @@ -84,7 +88,9 @@ void RawImageSource::CLASS cfa_linedn(float noise, bool horizontal, bool vertica } // Main algorithm: Tile loop +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) +#endif for (int top = 0; top < height - 16; top += TS - 32) for (int left = 0; left < width - 16; left += TS - 32) { @@ -251,7 +257,9 @@ void RawImageSource::CLASS cfa_linedn(float noise, bool horizontal, bool vertica free(cfain); // copy temporary buffer back to image matrix +#ifdef _OPENMP #pragma omp for schedule(dynamic,16) +#endif for(int i = 0; i < height; i++) { float f = rowblender(i); diff --git a/rtengine/dcraw.cc b/rtengine/dcraw.cc index 660d65385..af5d38c6c 100644 --- a/rtengine/dcraw.cc +++ b/rtengine/dcraw.cc @@ -1695,7 +1695,9 @@ void CLASS phase_one_correct() curve[i] = LIM(num+i,0,65535); } apply: /* apply to whole image */ +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) +#endif for (int row=0; row < raw_height; row++) { for (int col = (tag & 1)*ph1.split_col; col < raw_width; col++) { RAW(row,col) = curve[RAW(row,col)]; @@ -1770,8 +1772,10 @@ void CLASS phase_one_correct() cx[17] = cf[17] = ((unsigned) ref[15] * 65535) / lc[qr][qc][15]; cx[18] = cf[18] = 65535; cubic_spline(cx, cf, 19); +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) - for (int row = (qr ? ph1.split_row : 0); row < (qr ? raw_height : ph1.split_row); row++) +#endif + for (int row = (qr ? ph1.split_row : 0); row < (qr ? raw_height : ph1.split_row); row++) for (int col = (qc ? ph1.split_col : 0); col < (qc ? raw_width : ph1.split_col); col++) RAW(row,col) = curve[RAW(row,col)]; } @@ -1787,7 +1791,9 @@ void CLASS phase_one_correct() qmult[1][0] = 1.0 + getreal(11); get4(); get4(); get4(); qmult[1][1] = 1.0 + getreal(11); +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) +#endif for (int row=0; row < raw_height; row++) { for (int col=0; col < raw_width; col++) { int i = qmult[row >= ph1.split_row][col >= ph1.split_col] * RAW(row,col); @@ -2329,7 +2335,9 @@ void CLASS hasselblad_correct() } // apply flatfield +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row = 0; row < raw_height; row++) { int ffs, cur_ffr, i, c; if (row < row_offset) { @@ -4470,7 +4478,9 @@ void CLASS crop_masked_pixels() } } } else { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row=0; row < height; row++) for (int col=0; col < width; col++) BAYER2(row,col) = RAW(row+top_margin,col+left_margin); diff --git a/rtengine/demosaic_algos.cc b/rtengine/demosaic_algos.cc index a4d896eec..617feb3b4 100644 --- a/rtengine/demosaic_algos.cc +++ b/rtengine/demosaic_algos.cc @@ -1528,7 +1528,7 @@ void RawImageSource::igv_interpolate(int winw, int winh) const int v1 = 1 * width, v2 = 2 * width, v3 = 3 * width, v4 = 4 * width, v5 = 5 * width, v6 = 6 * width; float* rgb[3]; float* chr[2]; - float (*rgbarray), *vdif, *hdif, (*chrarray); + float *rgbarray, *vdif, *hdif, *chrarray; rgbarray = (float (*)) calloc(width * height * 3, sizeof( float)); rgb[0] = rgbarray; @@ -1789,7 +1789,9 @@ void RawImageSource::nodemosaic(bool bw) red(W, H); green(W, H); blue(W, H); +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < H; i++) { for (int j = 0; j < W; j++) { diff --git a/rtengine/dfmanager.cc b/rtengine/dfmanager.cc index 96b8ddc59..5f1035a8e 100644 --- a/rtengine/dfmanager.cc +++ b/rtengine/dfmanager.cc @@ -219,10 +219,14 @@ void dfInfo::updateBadPixelList( RawImage *df ) if( df->getSensorType() == ST_BAYER || df->getSensorType() == ST_FUJI_XTRANS ) { std::vector badPixelsTemp; +#ifdef _OPENMP #pragma omp parallel +#endif { std::vector badPixelsThread; +#ifdef _OPENMP #pragma omp for nowait +#endif for( int row = 2; row < df->get_height() - 2; row++) for( int col = 2; col < df->get_width() - 2; col++) { @@ -235,7 +239,9 @@ void dfInfo::updateBadPixelList( RawImage *df ) } } +#ifdef _OPENMP #pragma omp critical +#endif badPixelsTemp.insert(badPixelsTemp.end(), badPixelsThread.begin(), badPixelsThread.end()); } badPixels.insert(badPixels.end(), badPixelsTemp.begin(), badPixelsTemp.end()); diff --git a/rtengine/dirpyr_equalizer.cc b/rtengine/dirpyr_equalizer.cc index 5e33fd3b3..4a30b48a8 100644 --- a/rtengine/dirpyr_equalizer.cc +++ b/rtengine/dirpyr_equalizer.cc @@ -143,7 +143,9 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt } #ifdef __SSE2__ +#ifdef _OPENMP #pragma omp parallel for +#endif for(int i = 0; i < srcheight; i++) { int j; @@ -158,7 +160,9 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt } #else +#ifdef _OPENMP #pragma omp parallel for +#endif for(int i = 0; i < srcheight; i++) { for(int j = 0; j < srcwidth; j++) { @@ -174,10 +178,14 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt } #ifdef __SSE2__ +#ifdef _OPENMP #pragma omp parallel +#endif { __m128 div = _mm_set1_ps(327.68f); +#ifdef _OPENMP #pragma omp for +#endif for(int i = 0; i < srcheight; i++) { int j; @@ -192,7 +200,9 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt } } #else +#ifdef _OPENMP #pragma omp parallel for +#endif for(int i = 0; i < srcheight; i++) { for(int j = 0; j < srcwidth; j++) { @@ -228,7 +238,9 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt delete [] tmpHue; } +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < srcheight; i++) for (int j = 0; j < srcwidth; j++) { diff --git a/rtengine/dual_demosaic_RT.cc b/rtengine/dual_demosaic_RT.cc index 4ffac9296..790275d7e 100644 --- a/rtengine/dual_demosaic_RT.cc +++ b/rtengine/dual_demosaic_RT.cc @@ -91,9 +91,13 @@ void RawImageSource::dual_demosaic_RT(bool isBayer, const RAWParams &raw, int wi { 0.019334, 0.119193, 0.950227 } }; +#ifdef _OPENMP #pragma omp parallel +#endif { +#ifdef _OPENMP #pragma omp for +#endif for(int i = 0; i < winh; ++i) { Color::RGB2L(red[i], green[i], blue[i], L[i], xyz_rgb, winw); } @@ -106,19 +110,25 @@ void RawImageSource::dual_demosaic_RT(bool isBayer, const RAWParams &raw, int wi contrast = contrastf * 100.f; // the following is split into 3 loops intentionally to avoid cache conflicts on CPUs with only 4-way cache +#ifdef _OPENMP #pragma omp parallel for +#endif for(int i = 0; i < winh; ++i) { for(int j = 0; j < winw; ++j) { red[i][j] = intp(blend[i][j], red[i][j], redTmp[i][j]); } } +#ifdef _OPENMP #pragma omp parallel for +#endif for(int i = 0; i < winh; ++i) { for(int j = 0; j < winw; ++j) { green[i][j] = intp(blend[i][j], green[i][j], greenTmp[i][j]); } } +#ifdef _OPENMP #pragma omp parallel for +#endif for(int i = 0; i < winh; ++i) { for(int j = 0; j < winw; ++j) { blue[i][j] = intp(blend[i][j], blue[i][j], blueTmp[i][j]); diff --git a/rtengine/eahd_demosaic.cc b/rtengine/eahd_demosaic.cc index 816f4bf55..18883586c 100644 --- a/rtengine/eahd_demosaic.cc +++ b/rtengine/eahd_demosaic.cc @@ -431,7 +431,9 @@ void RawImageSource::eahd_demosaic () } // Interpolate R and B +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < H; i++) { if (i == 0) { interpolate_row_rb_mul_pp (rawData, red[i], blue[i], nullptr, green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1); @@ -443,4 +445,4 @@ void RawImageSource::eahd_demosaic () } } -} \ No newline at end of file +} diff --git a/rtengine/expo_before_b.cc b/rtengine/expo_before_b.cc index eda8c9e37..cd5e77df5 100644 --- a/rtengine/expo_before_b.cc +++ b/rtengine/expo_before_b.cc @@ -63,13 +63,17 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2DgetSensorType() == ST_BAYER || ri->getSensorType() == ST_FUJI_XTRANS) +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row = 0; row < height; row++) for (int col = 0; col < width; col++) { rawData[row][col] *= expos; } else +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row = 0; row < height; row++) for (int col = 0; col < width; col++) { rawData[row][col * 3] *= expos; @@ -88,12 +92,16 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2DgetSensorType() == ST_BAYER || ri->getSensorType() == ST_FUJI_XTRANS) +#ifdef _OPENMP #pragma omp for schedule(dynamic,16) nowait +#endif for(int row = 0; row < height; row++) for (int col = 0; col < width; col++) { if (rawData[row][col] > maxValFloatThr) { @@ -101,7 +109,9 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2D maxValFloat) { maxValFloat = maxValFloatThr; @@ -141,14 +153,18 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2DgetSensorType() == ST_BAYER || ri->getSensorType() == ST_FUJI_XTRANS) +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) +#endif for(int row = 0; row < height; row++) for(int col = 0; col < width; col++) { float lumi = 0.299f * red[row][col] + 0.587f * green[row][col] + 0.114f * blue[row][col]; rawData[row][col] *= lumi < K ? expos : lut[lumi]; } else +#ifdef _OPENMP #pragma omp parallel for +#endif for(int row = 0; row < height; row++) for(int col = 0; col < width; col++) { float lumi = 0.299f * rawData[row][col * 3] + 0.587f * rawData[row][col * 3 + 1] + 0.114f * rawData[row][col * 3 + 2]; diff --git a/rtengine/fast_demo.cc b/rtengine/fast_demo.cc index e88661485..1584c4ca2 100644 --- a/rtengine/fast_demo.cc +++ b/rtengine/fast_demo.cc @@ -77,10 +77,10 @@ void RawImageSource::fast_demosaic() #endif { - char (*buffer); - float (*greentile); - float (*redtile); - float (*bluetile); + char *buffer; + float *greentile; + float *redtile; + float *bluetile; #define CLF 1 // assign working space buffer = (char *) calloc(3 * sizeof(float) * TS * TS + 3 * CLF * 64 + 63, 1); diff --git a/rtengine/hilite_recon.cc b/rtengine/hilite_recon.cc index b8ad23e70..bb173f11a 100644 --- a/rtengine/hilite_recon.cc +++ b/rtengine/hilite_recon.cc @@ -336,7 +336,9 @@ void RawImageSource::boxblur_resamp(float **src, float **dst, float ** temp, int } // process remaining columns +#ifdef _OPENMP #pragma omp single +#endif { //vertical blur diff --git a/rtengine/hphd_demosaic_RT.cc b/rtengine/hphd_demosaic_RT.cc index fd2e5cce1..5c15469ca 100644 --- a/rtengine/hphd_demosaic_RT.cc +++ b/rtengine/hphd_demosaic_RT.cc @@ -52,7 +52,9 @@ void hphd_vertical(const array2D &rawData, float** hpmap, int col_from, i #endif for (; k < col_to - 7; k += numCols) { for (int i = 5; i < H - 5; i++) { +#ifdef _OPENMP #pragma omp simd +#endif for(int h = 0; h < numCols; ++h) { temp[i][h] = std::fabs((rawData[i - 5][k + h] - rawData[i + 5][k + h]) - 8 * (rawData[i - 4][k + h] - rawData[i + 4][k + h]) + 27 * (rawData[i - 3][k + h] - rawData[i + 3][k + h]) - 48 * (rawData[i - 2][k + h] - rawData[i + 2][k + h]) + 42 * (rawData[i - 1][k + h] - rawData[i + 1][k + h])); } @@ -68,7 +70,9 @@ void hphd_vertical(const array2D &rawData, float** hpmap, int col_from, i STVFU(avg[j][4], avgL2); STVFU(dev[j][4], vmaxf(epsv, (SQRV(LVFU(temp[j - 4][4]) - avgL2) + SQRV(LVFU(temp[j - 3][4]) - avgL2)) + (SQRV(LVFU(temp[j - 2][4]) - avgL2) + SQRV(LVFU(temp[j - 1][4]) - avgL2)) + (SQRV(LVFU(temp[j][4]) - avgL2) + SQRV(LVFU(temp[j + 1][4]) - avgL2)) + (SQRV(LVFU(temp[j + 2][4]) - avgL2) + SQRV(LVFU(temp[j + 3][4]) - avgL2)) + SQRV(LVFU(temp[j + 4][4]) - avgL2))); #else +#ifdef _OPENMP #pragma omp simd +#endif for(int h = 0; h < numCols; ++h) { const float avgL = ((temp[j - 4][h] + temp[j - 3][h]) + (temp[j - 2][h] + temp[j - 1][h]) + (temp[j][h] + temp[j + 1][h]) + (temp[j + 2][h] + temp[j + 3][h]) + temp[j + 4][h]) / 9.f; avg[j][h] = avgL; @@ -78,7 +82,9 @@ void hphd_vertical(const array2D &rawData, float** hpmap, int col_from, i } for (int j = 5; j < H - 5; j++) { +#ifdef _OPENMP #pragma omp simd +#endif for(int h = 0; h < numCols; ++h) { const float avgL = avg[j - 1][h]; const float avgR = avg[j + 1][h]; @@ -126,12 +132,16 @@ void hphd_horizontal(const array2D &rawData, float** hpmap, int row_from, const vfloat zd8v = F2V(0.8f); #endif for (int i = row_from; i < row_to; i++) { +#ifdef _OPENMP #pragma omp simd +#endif for (int j = 5; j < W - 5; j++) { temp[j] = std::fabs((rawData[i][j - 5] - rawData[i][j + 5]) - 8 * (rawData[i][j - 4] - rawData[i][j + 4]) + 27 * (rawData[i][j - 3] - rawData[i][j + 3]) - 48 * (rawData[i][j - 2] - rawData[i][j + 2]) + 42 * (rawData[i][j - 1] - rawData[i][j + 1])); } +#ifdef _OPENMP #pragma omp simd +#endif for (int j = 4; j < W - 4; j++) { const float avgL = ((temp[j - 4] + temp[j - 3]) + (temp[j - 2] + temp[j - 1]) + (temp[j] + temp[j + 1]) + (temp[j + 2] + temp[j + 3]) + temp[j + 4]) / 9.f; avg[j] = avgL; @@ -301,7 +311,7 @@ void RawImageSource::hphd_demosaic () } } #else - hphd_vertical(hpmap, 0, W, H); + hphd_vertical(rawData, hpmap, 0, W, H); #endif if (plistener) { @@ -322,7 +332,7 @@ void RawImageSource::hphd_demosaic () } } #else - hphd_horizontal(hpmap, 0, H); + hphd_horizontal(rawData, hpmap, 0, H, W); #endif if (plistener) { @@ -335,7 +345,9 @@ void RawImageSource::hphd_demosaic () plistener->setProgress(0.65); } +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 4; i < H - 4; i++) { interpolate_row_rb_mul_pp(rawData, red[i], blue[i], green[i - 1], green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1); } diff --git a/rtengine/imagefloat.cc b/rtengine/imagefloat.cc index 0d978aa5b..34ec06492 100644 --- a/rtengine/imagefloat.cc +++ b/rtengine/imagefloat.cc @@ -108,8 +108,6 @@ void Imagefloat::setScanline (int row, unsigned char* buffer, int bps, unsigned } -namespace rtengine { extern void filmlike_clip(float *r, float *g, float *b); } - void Imagefloat::getScanline (int row, unsigned char* buffer, int bps, bool isFloat) const { @@ -441,11 +439,15 @@ void Imagefloat::calcCroppedHistogram(const ProcParams ¶ms, float scale, LUT int x1, x2, y1, y2; params.crop.mapToResized(width, height, scale, x1, x2, y1, y2); +#ifdef _OPENMP #pragma omp parallel +#endif { LUTu histThr(65536); histThr.clear(); +#ifdef _OPENMP #pragma omp for nowait +#endif for (int y = y1; y < y2; y++) { for (int x = x1; x < x2; x++) { @@ -461,7 +463,9 @@ void Imagefloat::calcCroppedHistogram(const ProcParams ¶ms, float scale, LUT } } +#ifdef _OPENMP #pragma omp critical +#endif { for(int i = 0; i <= 0xffff; i++) { hist[i] += histThr[i]; diff --git a/rtengine/improccoordinator.cc b/rtengine/improccoordinator.cc index 24107ea1f..12ef0c226 100644 --- a/rtengine/improccoordinator.cc +++ b/rtengine/improccoordinator.cc @@ -1048,9 +1048,13 @@ void ImProcCoordinator::updateLRGBHistograms() int x1, y1, x2, y2; params.crop.mapToResized(pW, pH, scale, x1, x2, y1, y2); +#ifdef _OPENMP #pragma omp parallel sections +#endif { +#ifdef _OPENMP #pragma omp section +#endif { histChroma.clear(); @@ -1060,7 +1064,9 @@ void ImProcCoordinator::updateLRGBHistograms() histChroma[(int)(sqrtf(SQR(nprevl->a[i][j]) + SQR(nprevl->b[i][j])) / 188.f)]++; //188 = 48000/256 } } +#ifdef _OPENMP #pragma omp section +#endif { histLuma.clear(); @@ -1070,7 +1076,9 @@ void ImProcCoordinator::updateLRGBHistograms() histLuma[(int)(nprevl->L[i][j] / 128.f)]++; } } +#ifdef _OPENMP #pragma omp section +#endif { histRed.clear(); histGreen.clear(); @@ -1302,7 +1310,9 @@ void ImProcCoordinator::saveInputICCReference(const Glib::ustring& fname, bool a int cy = params.crop.y; int cw = params.crop.w; int ch = params.crop.h; +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = cy; i < cy + ch; i++) { for (int j = cx; j < cx + cw; j++) { @@ -1317,7 +1327,9 @@ void ImProcCoordinator::saveInputICCReference(const Glib::ustring& fname, bool a } // image may contain out of range samples, clip them to avoid wrap-arounds +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < im->getHeight(); i++) { for (int j = 0; j < im->getWidth(); j++) { diff --git a/rtengine/improcfun.cc b/rtengine/improcfun.cc index 404229d10..fb8093de9 100644 --- a/rtengine/improcfun.cc +++ b/rtengine/improcfun.cc @@ -777,7 +777,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw hist16Qthr.clear(); } +#ifdef _OPENMP #pragma omp for reduction(+:sum) +#endif for (int i = 0; i < height; i++) @@ -846,7 +848,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw //can be used in case of... } +#ifdef _OPENMP #pragma omp critical +#endif { if (needJ) { hist16J += hist16Jthr; @@ -995,7 +999,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw int bufferLength = ((width + 3) / 4) * 4; // bufferLength has to be a multiple of 4 #endif #ifndef _DEBUG +#ifdef _OPENMP #pragma omp parallel +#endif #endif { float minQThr = 10000.f; @@ -1010,7 +1016,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw float sbuffer[bufferLength] ALIGNED16; #endif #ifndef _DEBUG +#ifdef _OPENMP #pragma omp for schedule(dynamic, 16) +#endif #endif for (int i = 0; i < height; i++) { @@ -1618,7 +1626,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw #endif } +#ifdef _OPENMP #pragma omp critical +#endif { if (minQThr < minQ) { minQ = minQThr; @@ -1731,11 +1741,15 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw #ifndef _DEBUG +#ifdef _OPENMP #pragma omp parallel +#endif #endif { #ifndef _DEBUG +#ifdef _OPENMP #pragma omp for schedule(dynamic, 10) +#endif #endif for (int i = 0; i < height; i++) // update CieImages with new values after sharpening, defringe, contrast by detail level @@ -1768,7 +1782,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw const float co_e = (pow_F (f_l, 0.25f)) + eps; #ifndef _DEBUG +#ifdef _OPENMP #pragma omp parallel +#endif #endif { #ifdef __SSE2__ @@ -1782,7 +1798,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw #endif #ifndef _DEBUG +#ifdef _OPENMP #pragma omp for schedule(dynamic, 10) +#endif #endif for (int i = 0; i < height; i++) { // update CIECAM with new values after tone-mapping @@ -4120,7 +4138,9 @@ void ImProcFunctions::luminanceCurve (LabImage* lold, LabImage* lnew, LUTf & cur int W = lold->W; int H = lold->H; +#ifdef _OPENMP #pragma omp parallel for if (multiThread) +#endif for (int i = 0; i < H; i++) for (int j = 0; j < W; j++) { @@ -4386,17 +4406,21 @@ void ImProcFunctions::chromiLuminanceCurve (PipetteBuffer *pipetteBuffer, int pW {wprof[2][0], wprof[2][1], wprof[2][2]} }; +#ifdef _OPENMP #ifdef _DEBUG #pragma omp parallel default(shared) firstprivate(lold, lnew, MunsDebugInfo, pW) if (multiThread) #else #pragma omp parallel if (multiThread) +#endif #endif { #ifdef __SSE2__ float HHBuffer[W] ALIGNED16; float CCBuffer[W] ALIGNED16; #endif +#ifdef _OPENMP #pragma omp for schedule(dynamic, 16) +#endif for (int i = 0; i < H; i++) { if (avoidColorShift) @@ -5136,7 +5160,9 @@ void ImProcFunctions::EPDToneMapCIE (CieImage *ncie, float a_w, float c_, int Wi EdgePreservingDecomposition epd (Wid, Hei); +#ifdef _OPENMP #pragma omp parallel for +#endif for (int i = 0; i < Hei; i++) for (int j = 0; j < Wid; j++) { @@ -5162,7 +5188,9 @@ void ImProcFunctions::EPDToneMapCIE (CieImage *ncie, float a_w, float c_, int Wi //Restore past range, also desaturate a bit per Mantiuk's Color correction for tone mapping. float s = (1.0f + 38.7889f) * powf (Compression, 1.5856f) / (1.0f + 38.7889f * powf (Compression, 1.5856f)); #ifndef _DEBUG +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic,10) +#endif #endif for (int i = 0; i < Hei; i++) @@ -5243,11 +5271,15 @@ void ImProcFunctions::EPDToneMap (LabImage *lab, unsigned int Iterates, int skip //Due to the taking of logarithms, L must be nonnegative. Further, scale to 0 to 1 using nominal range of L, 0 to 15 bit. float minL = FLT_MAX; float maxL = 0.f; +#ifdef _OPENMP #pragma omp parallel +#endif { float lminL = FLT_MAX; float lmaxL = 0.f; +#ifdef _OPENMP #pragma omp for +#endif for (size_t i = 0; i < N; i++) { if (L[i] < lminL) { @@ -5259,7 +5291,9 @@ void ImProcFunctions::EPDToneMap (LabImage *lab, unsigned int Iterates, int skip } } +#ifdef _OPENMP #pragma omp critical +#endif { if (lminL < minL) { minL = lminL; @@ -5279,7 +5313,9 @@ void ImProcFunctions::EPDToneMap (LabImage *lab, unsigned int Iterates, int skip maxL = 1.f; } +#ifdef _OPENMP #pragma omp parallel for +#endif for (size_t i = 0; i < N; ++i) //{L[i] = (L[i] - minL)/32767.0f; diff --git a/rtengine/ipresize.cc b/rtengine/ipresize.cc index 2d12417e1..a14115120 100644 --- a/rtengine/ipresize.cc +++ b/rtengine/ipresize.cc @@ -54,7 +54,9 @@ void ImProcFunctions::Lanczos (const Imagefloat* src, Imagefloat* dst, float sca const float sc = min (scale, 1.0f); const int support = static_cast (2.0f * a / sc) + 1; +#ifdef _OPENMP #pragma omp parallel +#endif { // storage for precomputed parameters for horisontal interpolation float * wwh = new float[support * dst->getWidth()]; @@ -97,7 +99,9 @@ void ImProcFunctions::Lanczos (const Imagefloat* src, Imagefloat* dst, float sca } // Phase 2: do actual interpolation +#ifdef _OPENMP #pragma omp for +#endif for (int i = 0; i < dst->getHeight(); i++) { diff --git a/rtengine/iptransform.cc b/rtengine/iptransform.cc index a5ade7c53..8c06252c1 100644 --- a/rtengine/iptransform.cc +++ b/rtengine/iptransform.cc @@ -705,7 +705,9 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat* } bool darkening = (params->vignetting.amount <= 0.0); +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic,16) if (multiThread) +#endif for (int y = 0; y < transformed->getHeight(); y++) { double vig_y_d = applyVignetting ? (double) (y + cy) - vig_h2 : 0.0; @@ -817,7 +819,9 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I #endif // main cycle bool darkening = (params->vignetting.amount <= 0.0); +#ifdef _OPENMP #pragma omp parallel for if (multiThread) +#endif for (int y = 0; y < transformed->getHeight(); y++) { for (int x = 0; x < transformed->getWidth(); x++) { @@ -964,7 +968,9 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans chTrans[1] = transformed->g.ptrs; chTrans[2] = transformed->b.ptrs; +#ifdef _OPENMP #pragma omp parallel for if (multiThread) +#endif for (int y = 0; y < transformed->getHeight(); y++) { for (int x = 0; x < transformed->getWidth(); x++) { diff --git a/rtengine/ipvibrance.cc b/rtengine/ipvibrance.cc index 86c942731..5f178b5c0 100644 --- a/rtengine/ipvibrance.cc +++ b/rtengine/ipvibrance.cc @@ -40,8 +40,6 @@ namespace rtengine using namespace procparams; -extern const Settings* settings; - void fillCurveArrayVib (DiagonalCurve* diagCurve, LUTf &outCurve) { @@ -170,7 +168,9 @@ void ImProcFunctions::vibrance (LabImage* lab) {static_cast(wiprof[2][0]), static_cast(wiprof[2][1]), static_cast(wiprof[2][2])} }; +#ifdef _OPENMP #pragma omp parallel if (multiThread) +#endif { #ifdef __SSE2__ @@ -179,7 +179,9 @@ void ImProcFunctions::vibrance (LabImage* lab) #endif float sathue[5], sathue2[4]; // adjust sat in function of hue +#ifdef _OPENMP #pragma omp for schedule(dynamic, 16) +#endif for (int i = 0; i < height; i++) { #ifdef __SSE2__ diff --git a/rtengine/previewimage.cc b/rtengine/previewimage.cc index f180469ad..1538ae5fa 100644 --- a/rtengine/previewimage.cc +++ b/rtengine/previewimage.cc @@ -77,7 +77,9 @@ PreviewImage::PreviewImage (const Glib::ustring &fname, const Glib::ustring &ext previewImage = Cairo::ImageSurface::create(Cairo::FORMAT_RGB24, w, h); previewImage->flush(); +#ifdef _OPENMP #pragma omp parallel for +#endif for (unsigned int i = 0; i < (unsigned int)(h); ++i) { const unsigned char *src = data + i * w * 3; unsigned char *dst = previewImage->get_data() + i * w * 4; @@ -119,7 +121,9 @@ PreviewImage::PreviewImage (const Glib::ustring &fname, const Glib::ustring &ext rawImage.getImage (wb, TR_NONE, &image, pp, params.toneCurve, params.raw); rtengine::Image8 output(fw, fh); rawImage.convertColorSpace(&image, params.icm, wb); +#ifdef _OPENMP #pragma omp parallel for schedule(dynamic, 10) +#endif for (int i = 0; i < fh; ++i) for (int j = 0; j < fw; ++j) { image.r(i, j) = Color::gamma2curve[image.r(i, j)]; @@ -139,7 +143,9 @@ PreviewImage::PreviewImage (const Glib::ustring &fname, const Glib::ustring &ext previewImage = Cairo::ImageSurface::create(Cairo::FORMAT_RGB24, w, h); previewImage->flush(); - #pragma omp parallel for +#ifdef _OPENMP + #pragma omp parallel for +#endif for (unsigned int i = 0; i < (unsigned int)(h); i++) { const unsigned char *src = data + i * w * 3; unsigned char *dst = previewImage->get_data() + i * w * 4; diff --git a/rtengine/rawimage.cc b/rtengine/rawimage.cc index ced3fbb23..18848274a 100644 --- a/rtengine/rawimage.cc +++ b/rtengine/rawimage.cc @@ -127,7 +127,9 @@ void RawImage::get_colorsCoeff( float *pre_mul_, float *scale_mul_, float *cblac dsum[FC(1, 0) + 4] += (int)(((W + 1) / 2) * (H / 2)); dsum[FC(1, 1) + 4] += (int)((W / 2) * (H / 2)); +#ifdef _OPENMP #pragma omp parallel private(val) +#endif { double dsumthr[8]; memset(dsumthr, 0, sizeof dsumthr); @@ -142,7 +144,9 @@ void RawImage::get_colorsCoeff( float *pre_mul_, float *scale_mul_, float *cblac } float *tempdata = data[0]; +#ifdef _OPENMP #pragma omp for nowait +#endif for (size_t row = 0; row < H; row += 8) { size_t ymax = row + 8 < H ? row + 8 : H; @@ -176,7 +180,9 @@ skip_block2: } } +#ifdef _OPENMP #pragma omp critical +#endif { for (int c = 0; c < 4; c++) { dsum[c] += dsumthr[c]; @@ -194,7 +200,9 @@ skip_block2: } } else if(isXtrans()) { +#ifdef _OPENMP #pragma omp parallel +#endif { double dsumthr[8]; memset(dsumthr, 0, sizeof dsumthr); @@ -209,7 +217,9 @@ skip_block2: whitefloat[c] = this->get_white(c) - whiteThreshold; } +#ifdef _OPENMP #pragma omp for nowait +#endif for (size_t row = 0; row < H; row += 8) for (size_t col = 0; col < W ; col += 8) @@ -239,7 +249,9 @@ skip_block3: ; } +#ifdef _OPENMP #pragma omp critical +#endif { for (int c = 0; c < 8; c++) { @@ -716,7 +728,9 @@ float** RawImage::compress_image(unsigned int frameNum, bool freeImage) // copy pixel raw data: the compressed format earns space if( float_raw_image ) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row = 0; row < height; row++) for (int col = 0; col < width; col++) { @@ -726,21 +740,27 @@ float** RawImage::compress_image(unsigned int frameNum, bool freeImage) delete [] float_raw_image; float_raw_image = nullptr; } else if (filters != 0 && !isXtrans()) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row = 0; row < height; row++) for (int col = 0; col < width; col++) { this->data[row][col] = image[row * width + col][FC(row, col)]; } } else if (isXtrans()) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row = 0; row < height; row++) for (int col = 0; col < width; col++) { this->data[row][col] = image[row * width + col][XTRANSFC(row, col)]; } } else if (colors == 1) { +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row = 0; row < height; row++) for (int col = 0; col < width; col++) { @@ -751,7 +771,9 @@ float** RawImage::compress_image(unsigned int frameNum, bool freeImage) height -= top_margin; width -= left_margin; } +#ifdef _OPENMP #pragma omp parallel for +#endif for (int row = 0; row < height; row++) for (int col = 0; col < width; col++) { diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index b59e8ecab..f551eb0dc 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -3407,7 +3407,9 @@ void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur, int boxH, } +#ifdef _OPENMP #pragma omp single +#endif for (int col = W - (W % 8); col < W; col++) { int len = boxH / 2 + 1; diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index dd844ad5c..46f1a91c8 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -335,13 +335,17 @@ private: LUTf gamcurve (65536, 0); float gam, gamthresh, gamslope; ipf.RGB_denoise_infoGamCurve (params.dirpyrDenoise, imgsrc->isRAW(), gamcurve, gam, gamthresh, gamslope); +#ifdef _OPENMP #pragma omp parallel +#endif { Imagefloat *origCropPart;//init auto noise origCropPart = new Imagefloat (crW, crH);//allocate memory Imagefloat *provicalc = new Imagefloat ((crW + 1) / 2, (crH + 1) / 2); //for denoise curves int skipP = 1; +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) nowait +#endif for (int wcr = 0; wcr < numtiles_W; wcr++) { for (int hcr = 0; hcr < numtiles_H; hcr++) { @@ -557,13 +561,17 @@ private: coordH[0] = begH; coordH[1] = fh / 2 - crH / 2; coordH[2] = fh - crH - begH; +#ifdef _OPENMP #pragma omp parallel +#endif { Imagefloat *origCropPart;//init auto noise origCropPart = new Imagefloat (crW, crH);//allocate memory Imagefloat *provicalc = new Imagefloat ((crW + 1) / 2, (crH + 1) / 2); //for denoise curves +#ifdef _OPENMP #pragma omp for schedule(dynamic) collapse(2) nowait +#endif for (int wcr = 0; wcr <= 2; wcr++) { for (int hcr = 0; hcr <= 2; hcr++) { @@ -807,7 +815,9 @@ private: if (denoiseParams.enabled && (noiseLCurve || noiseCCurve )) { // we only need image reduced to 1/4 here calclum = new Imagefloat ((fw + 1) / 2, (fh + 1) / 2); //for luminance denoise curve +#ifdef _OPENMP #pragma omp parallel for +#endif for (int ii = 0; ii < fh; ii += 2) { for (int jj = 0; jj < fw; jj += 2) { @@ -1031,7 +1041,9 @@ private: hist16thr[ (int) ((labView->L[i][j]))]++; } +#ifdef _OPENMP #pragma omp critical +#endif { hist16 += hist16thr; } diff --git a/rtengine/tmo_fattal02.cc b/rtengine/tmo_fattal02.cc index 124cdbfb1..e4788f3f8 100644 --- a/rtengine/tmo_fattal02.cc +++ b/rtengine/tmo_fattal02.cc @@ -187,7 +187,9 @@ void gaussianBlur (const Array2Df& I, Array2Df& L, bool multithread) Array2Df T (width, height); //--- X blur +#ifdef _OPENMP #pragma omp parallel for shared(I, T) if(multithread) +#endif for ( int y = 0 ; y < height ; y++ ) { for ( int x = 1 ; x < width - 1 ; x++ ) { @@ -202,7 +204,9 @@ void gaussianBlur (const Array2Df& I, Array2Df& L, bool multithread) } //--- Y blur +#ifdef _OPENMP #pragma omp parallel for if(multithread) +#endif for ( int x = 0 ; x < width - 7 ; x += 8 ) { for ( int y = 1 ; y < height - 1 ; y++ ) { @@ -279,7 +283,9 @@ float calculateGradients (Array2Df* H, Array2Df* G, int k, bool multithread) const float divider = pow ( 2.0f, k + 1 ); double avgGrad = 0.0; // use double precision for large summations +#ifdef _OPENMP #pragma omp parallel for reduction(+:avgGrad) if(multithread) +#endif for ( int y = 0 ; y < height ; y++ ) { int n = (y == 0 ? 0 : y - 1); @@ -354,7 +360,9 @@ void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[], fi[nlevels - 1] = new Array2Df (width, height); +#ifdef _OPENMP #pragma omp parallel for shared(fi) if(multithread) +#endif for ( int k = 0 ; k < width * height ; k++ ) { (*fi[nlevels - 1]) (k) = 1.0f; } @@ -366,7 +374,9 @@ void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[], // only apply gradients to levels>=detail_level but at least to the coarsest if ((k >= detail_level || k == nlevels - 1) && beta != 1.f) { //DEBUG_STR << "calculateFiMatrix: apply gradient to level " << k << endl; +#ifdef _OPENMP #pragma omp parallel for shared(fi,avgGrad) if(multithread) +#endif for ( int y = 0; y < height; y++ ) { for ( int x = 0; x < width; x++ ) { float grad = ((*gradients[k]) (x, y) < 1e-4f) ? 1e-4 : (*gradients[k]) (x, y); @@ -455,7 +465,9 @@ void tmo_fattal02 (size_t width, // float minLum = Y (0, 0); float maxLum = Y (0, 0); +#ifdef _OPENMP #pragma omp parallel for reduction(max:maxLum) if(multithread) +#endif for ( int i = 0 ; i < size ; i++ ) { maxLum = std::max (maxLum, Y (i)); @@ -464,13 +476,17 @@ void tmo_fattal02 (size_t width, Array2Df* H = new Array2Df (width, height); float temp = 100.f / maxLum; float eps = 1e-4f; +#ifdef _OPENMP #pragma omp parallel if(multithread) +#endif { #ifdef __SSE2__ vfloat epsv = F2V (eps); vfloat tempv = F2V (temp); #endif +#ifdef _OPENMP #pragma omp for schedule(dynamic,16) +#endif for (size_t i = 0 ; i < height ; ++i) { size_t j = 0; @@ -573,7 +589,9 @@ void tmo_fattal02 (size_t width, // boundary conditions, so we need to adjust the assembly of the right hand // side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero // Neumann conditions assume U(-1)=U(0)), see also divergence calculation +#ifdef _OPENMP #pragma omp parallel for if(multithread) +#endif for ( size_t y = 0 ; y < height ; y++ ) { // sets index+1 based on the boundary assumption H(N+1)=H(N-1) @@ -591,7 +609,9 @@ void tmo_fattal02 (size_t width, delete H; // calculate divergence +#ifdef _OPENMP #pragma omp parallel for if(multithread) +#endif for ( size_t y = 0; y < height; ++y ) { for ( size_t x = 0; x < width; ++x ) { @@ -626,12 +646,16 @@ void tmo_fattal02 (size_t width, delete Gx; delete FI; +#ifdef _OPENMP #pragma omp parallel if(multithread) +#endif { #ifdef __SSE2__ vfloat gammav = F2V (gamma); #endif +#ifdef _OPENMP #pragma omp for schedule(dynamic,16) +#endif for (size_t i = 0 ; i < height ; i++) { size_t j = 0; @@ -706,7 +730,9 @@ void transform_ev2normal (Array2Df *A, Array2Df *T, bool multithread) // the discrete cosine transform is not exactly the transform needed // need to scale input values to get the right transformation +#ifdef _OPENMP #pragma omp parallel for if(multithread) +#endif for (int y = 1 ; y < height - 1 ; y++ ) for (int x = 1 ; x < width - 1 ; x++ ) { @@ -757,7 +783,9 @@ void transform_normal2ev (Array2Df *A, Array2Df *T, bool multithread) // need to scale the output matrix to get the right transform float factor = (1.0f / ((height - 1) * (width - 1))); +#ifdef _OPENMP #pragma omp parallel for if(multithread) +#endif for (int y = 0 ; y < height ; y++ ) for (int x = 0 ; x < width ; x++ ) { @@ -876,7 +904,9 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/* std::vector l1 = get_lambda (height); std::vector l2 = get_lambda (width); +#ifdef _OPENMP #pragma omp parallel for if(multithread) +#endif for (int y = 0 ; y < height ; y++ ) { for (int x = 0 ; x < width ; x++ ) { @@ -896,13 +926,17 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/* // (not really needed but good for numerics as we later take exp(U)) //DEBUG_STR << "solve_pde_fft: removing constant from solution" << std::endl; float max = 0.f; +#ifdef _OPENMP #pragma omp parallel for reduction(max:max) if(multithread) +#endif for (int i = 0; i < width * height; i++) { max = std::max (max, (*U) (i)); } +#ifdef _OPENMP #pragma omp parallel for if(multithread) +#endif for (int i = 0; i < width * height; i++) { (*U) (i) -= max; diff --git a/rtgui/cropwindow.cc b/rtgui/cropwindow.cc index 50d8f9543..345b3e583 100644 --- a/rtgui/cropwindow.cc +++ b/rtgui/cropwindow.cc @@ -1493,7 +1493,9 @@ void CropWindow::expose (Cairo::RefPtr cr) } } +#ifdef _OPENMP #pragma omp critical +#endif { if(maxthrstdDev_L2 > maxstdDev_L2) { maxstdDev_L2 = maxthrstdDev_L2;