Fix non-OpenMP build

2019-02-06 06:54:31 +01:00 · 2019-02-06 06:54:31 +01:00 · 37027e46bd
commit 37027e46bd
parent 9210a56ca4
27 changed files with 364 additions and 14 deletions
--- a/rtengine/CA_correct_RT.cc
+++ b/rtengine/CA_correct_RT.cc
@ -151,7 +151,9 @@ float* RawImageSource::CA_correct_RT(
        blueFactor = new array2D<float>((W + 1 - 2 * cb) / 2, (H + 1 - 2 * cb) / 2);
        oldraw = new array2D<float>((W + 1- 2 * cb) / 2, H- 2 * cb);
        // copy raw values before ca correction
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif
        for (int i = cb; i < H - cb; ++i) {
            for (int j = cb + (FC(i, 0) & 1); j < W - cb; j += 2) {
                (*oldraw)[i - cb][(j - cb) / 2] = rawData[i][j];
@ -220,7 +222,9 @@ float* RawImageSource::CA_correct_RT(

        constexpr float eps = 1e-5f, eps2 = 1e-10f; //tolerance to avoid dividing by zero

+#ifdef _OPENMP
        #pragma omp parallel
+#endif
        {
            int progresscounter = 0;

@ -274,7 +278,9 @@ float* RawImageSource::CA_correct_RT(
                float blocksqavethr[2][2] = {};
                float blockdenomthr[2][2] = {};

+#ifdef _OPENMP
                #pragma omp for collapse(2) schedule(dynamic) nowait
+#endif
                for (int top = -border ; top < height; top += ts - border2) {
                    for (int left = -border; left < width - (W & 1); left += ts - border2) {
                        memset(bufferThr, 0, buffersize);
@ -647,7 +653,9 @@ float* RawImageSource::CA_correct_RT(
                            progresscounter++;

                            if (progresscounter % 8 == 0) {
+#ifdef _OPENMP
                                #pragma omp critical (cadetectpass1)
+#endif
                                {
                                    progress += 4.0 * SQR(ts - border2) / (iterations * height * width);
                                    progress = std::min(progress, 1.0);
@ -658,7 +666,9 @@ float* RawImageSource::CA_correct_RT(
                    }
                }
                //end of diagnostic pass
+#ifdef _OPENMP
                #pragma omp critical (cadetectpass2)
+#endif
                {
                    for (int dir = 0; dir < 2; dir++) {
                        for (int c = 0; c < 2; c++) {
@ -668,9 +678,13 @@ float* RawImageSource::CA_correct_RT(
                        }
                    }
                }
+#ifdef _OPENMP
                #pragma omp barrier
+#endif

+#ifdef _OPENMP
                #pragma omp single
+#endif
                {
                    for (int dir = 0; dir < 2; dir++)
                        for (int c = 0; c < 2; c++) {
@ -806,7 +820,9 @@ float* RawImageSource::CA_correct_RT(
                float* grbdiff = (float (*)) (data + 2 * sizeof(float) * ts * ts + 3 * 64); // there is no overlap in buffer usage => share
                //green interpolated to optical sample points for R/B
                float* gshift  = (float (*)) (data + 2 * sizeof(float) * ts * ts + sizeof(float) * ts * tsh + 4 * 64); // there is no overlap in buffer usage => share
+#ifdef _OPENMP
                #pragma omp for schedule(dynamic) collapse(2)
+#endif
                for (int top = -border; top < height; top += ts - border2) {
                    for (int left = -border; left < width - (W & 1); left += ts - border2) {
                        memset(bufferThr, 0, buffersizePassTwo);
@ -1197,7 +1213,9 @@ float* RawImageSource::CA_correct_RT(
                            progresscounter++;

                            if (progresscounter % 8 == 0)
+#ifdef _OPENMP
                                #pragma omp critical (cacorrect)
+#endif
                            {
                                progress += 4.0 * SQR(ts - border2) / (iterations * height * width);
                                progress = std::min(progress, 1.0);
@ -1208,7 +1226,9 @@ float* RawImageSource::CA_correct_RT(
                }

                // copy temporary image matrix back to image matrix
+#ifdef _OPENMP
                #pragma omp for
+#endif

                for (int row = cb; row < height - cb; row++) {
                    int col = cb + (FC(row, 0) & 1);
@ -1232,14 +1252,18 @@ float* RawImageSource::CA_correct_RT(
            // of red and blue channel and apply a gaussian blur to them.
            // Then we apply the resulting factors per pixel on the result of raw ca correction

+#ifdef _OPENMP
            #pragma omp parallel
+#endif
            {
 #ifdef __SSE2__
                const vfloat onev = F2V(1.f);
                const vfloat twov = F2V(2.f);
                const vfloat zd5v = F2V(0.5f);
 #endif
+#ifdef _OPENMP
                #pragma omp for
+#endif
                for (int i = 0; i < H - 2 * cb; ++i) {
                    const int firstCol = FC(i, 0) & 1;
                    const int colour = FC(i, firstCol);
@ -1260,7 +1284,9 @@ float* RawImageSource::CA_correct_RT(
                    }
                }

+#ifdef _OPENMP
                #pragma omp single
+#endif
                {
                    if (H % 2) {
                        // odd height => factors are not set in last row => use values of preceding row
@ -1287,7 +1313,9 @@ float* RawImageSource::CA_correct_RT(
                gaussianBlur(*blueFactor, *blueFactor, (W + 1 - 2 * cb) / 2, (H + 1 - 2 * cb) / 2, 30.0);

                // apply correction factors to avoid (reduce) colour shift
+#ifdef _OPENMP
                #pragma omp for
+#endif
                for (int i = 0; i < H - 2 * cb; ++i) {
                    const int firstCol = FC(i, 0) & 1;
                    const int colour = FC(i, firstCol);
--- a/rtengine/EdgePreservingDecomposition.cc
+++ b/rtengine/EdgePreservingDecomposition.cc
@ -324,7 +324,9 @@ void MultiDiagonalSymmetricMatrix::VectorProduct(float* RESTRICT Product, float*
        }

 #endif
+#ifdef _OPENMP
        #pragma omp single
+#endif
        {
 #ifdef __SSE2__

--- a/rtengine/FTblockDN.cc
+++ b/rtengine/FTblockDN.cc
@ -1838,11 +1838,15 @@ BENCHFUN

        for (int iteration = 1; iteration <= dnparams.passes; ++iteration) {

+#ifdef _OPENMP
            #pragma omp parallel
+#endif
            {
                if (methmed < 2)
                {
+#ifdef _OPENMP
                    #pragma omp for
+#endif

                    for (int i = 1; i < hei - 1; ++i) {
                        if (methmed == 0) {
@ -1857,7 +1861,9 @@ BENCHFUN
                    }
                } else
                {
+#ifdef _OPENMP
                    #pragma omp for
+#endif

                    for (int i = 2; i < hei - 2; ++i) {
                        if (methmed == 3) {
@ -1901,7 +1907,9 @@ BENCHFUN

                if (methmed < 2)
                {
+#ifdef _OPENMP
                    #pragma omp for
+#endif

                    for (int i = 1; i < hei - 1; ++i) {
                        if (methmed == 0) {
@ -1916,7 +1924,9 @@ BENCHFUN
                    }
                } else
                {
+#ifdef _OPENMP
                    #pragma omp for
+#endif

                    for (int i = 2; i < hei - 2; ++i) {
                        if (methmed == 3) {
@ -1961,7 +1971,9 @@ BENCHFUN

                if (methmed < 2)
                {
+#ifdef _OPENMP
                    #pragma omp for
+#endif

                    for (int i = 1; i < hei - 1; ++i) {
                        if (methmed == 0) {
@ -1976,7 +1988,9 @@ BENCHFUN
                    }
                } else
                {
+#ifdef _OPENMP
                    #pragma omp for
+#endif

                    for (int i = 2; i < hei - 2; ++i) {
                        if (methmed == 3) {
--- a/rtengine/bilateral2.h
+++ b/rtengine/bilateral2.h
@ -152,10 +152,14 @@ template<class T, class A> void bilateral05 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(318, 1)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER3(1, 7, 7, 55)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(1)
 }

@ -164,10 +168,14 @@ template<class T, class A> void bilateral06 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(768, 1)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER3(1, 4, 4, 16)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(1)
 }

@ -176,10 +184,14 @@ template<class T, class A> void bilateral07 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(366, 2)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER5(0, 0, 1, 0, 8, 21, 1, 21, 59)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(2)
 }

@ -188,10 +200,14 @@ template<class T, class A> void bilateral08 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(753, 2)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER5(0, 0, 1, 0, 5, 10, 1, 10, 23)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(2)
 }

@ -200,10 +216,14 @@ template<class T, class A> void bilateral09 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(595, 2)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER5(0, 1, 2, 1, 6, 12, 2, 12, 22)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(2)
 }

@ -212,10 +232,14 @@ template<class T, class A> void bilateral10 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(910, 2)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER5(0, 1, 2, 1, 4, 7, 2, 7, 12)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(2)
 }

@ -224,10 +248,14 @@ template<class T, class A> void bilateral11 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(209, 3)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER7(0, 0, 1, 1, 0, 2, 5, 8, 1, 5, 18, 27, 1, 8, 27, 41)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(3)
 }

@ -236,10 +264,14 @@ template<class T, class A> void bilateral12 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(322, 3)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER7(0, 0, 1, 1, 0, 1, 4, 6, 1, 4, 11, 16, 1, 6, 16, 23)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(3)
 }

@ -248,10 +280,14 @@ template<class T, class A> void bilateral13 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(336, 3)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER7(0, 0, 1, 1, 0, 2, 4, 6, 1, 4, 11, 14, 1, 6, 14, 19)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(3)
 }

@ -260,10 +296,14 @@ template<class T, class A> void bilateral14 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(195, 3)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER7(0, 1, 2, 3, 1, 4, 8, 10, 2, 8, 17, 21, 3, 10, 21, 28)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(3)
 }

@ -272,10 +312,14 @@ template<class T, class A> void bilateral15 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(132, 4)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER9(0, 0, 0, 1, 1, 0, 1, 2, 4, 5, 0, 2, 6, 12, 14, 1, 4, 12, 22, 28, 1, 5, 14, 28, 35)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(4)
 }

@ -284,10 +328,14 @@ template<class T, class A> void bilateral16 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(180, 4)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER9(0, 0, 0, 1, 1, 0, 1, 2, 3, 4, 0, 2, 5, 9, 10, 1, 3, 9, 15, 19, 1, 4, 10, 19, 23)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(4)
 }

@ -296,10 +344,14 @@ template<class T, class A> void bilateral17 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(195, 4)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER9(0, 0, 1, 1, 1, 0, 1, 2, 3, 4, 1, 2, 5, 8, 9, 1, 3, 8, 13, 16, 1, 4, 9, 16, 19)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(4)
 }

@ -308,10 +360,14 @@ template<class T, class A> void bilateral18 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(151, 4)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER9(0, 0, 1, 2, 2, 0, 1, 3, 5, 5, 1, 3, 6, 10, 12, 2, 5, 10, 16, 19, 2, 5, 12, 19, 22)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(4)
 }

@ -320,10 +376,14 @@ template<class T, class A> void bilateral19 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(151, 4)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER9(0, 0, 1, 2, 2, 0, 1, 3, 4, 5, 1, 3, 5, 8, 9, 2, 4, 8, 12, 14, 2, 5, 9, 14, 16)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(4)
 }

@ -332,10 +392,14 @@ template<class T, class A> void bilateral20 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(116, 5)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER11(0, 0, 0, 1, 1, 1, 0, 0, 1, 2, 3, 3, 0, 1, 2, 4, 7, 7, 1, 2, 4, 8, 12, 14, 1, 3, 7, 12, 18, 20, 1, 3, 7, 14, 20, 23)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(5)
 }

@ -344,10 +408,14 @@ template<class T, class A> void bilateral21 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(127, 5)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER11(0, 0, 0, 1, 1, 1, 0, 0, 1, 2, 3, 3, 0, 1, 2, 4, 6, 7, 1, 2, 4, 8, 11, 12, 1, 3, 6, 11, 15, 17, 1, 3, 7, 12, 17, 19)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(5)
 }

@ -356,10 +424,14 @@ template<class T, class A> void bilateral22 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(109, 5)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER11(0, 0, 0, 1, 1, 2, 0, 1, 2, 3, 3, 4, 1, 2, 3, 5, 7, 8, 1, 3, 5, 9, 12, 13, 1, 3, 7, 12, 16, 18, 2, 4, 8, 13, 18, 20)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(5)
 }

@ -368,10 +440,14 @@ template<class T, class A> void bilateral23 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(132, 5)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER11(0, 0, 1, 1, 1, 1, 0, 1, 1, 2, 3, 3, 1, 1, 3, 5, 6, 7, 1, 2, 5, 7, 10, 11, 1, 3, 6, 10, 13, 14, 1, 3, 7, 11, 14, 16)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(5)
 }

@ -380,10 +456,14 @@ template<class T, class A> void bilateral24 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(156, 5)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER11(0, 0, 1, 1, 1, 1, 0, 1, 1, 2, 3, 3, 1, 1, 3, 4, 5, 6, 1, 2, 4, 6, 8, 9, 1, 3, 5, 8, 10, 11, 1, 3, 6, 9, 11, 12)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(5)
 }

@ -392,10 +472,14 @@ template<class T, class A> void bilateral25 (T** src, T** dst, T** buffer, int W
 {

    BL_BEGIN(173, 5)
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_OPER11(0, 0, 1, 1, 1, 1, 0, 1, 1, 2, 3, 3, 1, 1, 2, 4, 5, 5, 1, 2, 4, 5, 7, 7, 1, 3, 5, 7, 9, 9, 1, 3, 5, 7, 9, 10)
    BL_FREE
+#ifdef _OPENMP
    #pragma omp for
+#endif
    BL_END(5)
 }

--- a/rtengine/cfa_linedn_RT.cc
+++ b/rtengine/cfa_linedn_RT.cc
@ -64,7 +64,9 @@ void RawImageSource::CLASS cfa_linedn(float noise, bool horizontal, bool vertica
    float noisevarm4 = 4.0f * noisevar;
    volatile double progress = 0.0;
    float* RawDataTmp = (float*)malloc( width * height * sizeof(float));
+#ifdef _OPENMP
    #pragma omp parallel
+#endif
    {

        // allocate memory and assure the arrays don't have same 64 byte boundary to avoid L1 conflict misses
@ -76,7 +78,9 @@ void RawImageSource::CLASS cfa_linedn(float noise, bool horizontal, bool vertica
        float linehvar[4], linevvar[4], noisefactor[4][8][2], coeffsq;
        float dctblock[4][8][8];

+#ifdef _OPENMP
        #pragma omp for
+#endif

        for(int i = 0; i < height; i++)
            for(int j = 0; j < width; j++) {
@ -84,7 +88,9 @@ void RawImageSource::CLASS cfa_linedn(float noise, bool horizontal, bool vertica
            }

        // Main algorithm: Tile loop
+#ifdef _OPENMP
        #pragma omp for schedule(dynamic) collapse(2)
+#endif

        for (int top = 0; top < height - 16; top += TS - 32)
            for (int left = 0; left < width - 16; left += TS - 32) {
@ -251,7 +257,9 @@ void RawImageSource::CLASS cfa_linedn(float noise, bool horizontal, bool vertica
        free(cfain);

 // copy temporary buffer back to image matrix
+#ifdef _OPENMP
        #pragma omp for schedule(dynamic,16)
+#endif

        for(int i = 0; i < height; i++) {
            float f = rowblender(i);
--- a/rtengine/dcraw.cc
+++ b/rtengine/dcraw.cc
@ -1695,7 +1695,9 @@ void CLASS phase_one_correct()
            curve[i] = LIM(num+i,0,65535);
            }
            apply:					/* apply to whole image */
+#ifdef _OPENMP
            #pragma omp parallel for schedule(dynamic,16)
+#endif
            for (int row=0; row < raw_height; row++) {
                for (int col = (tag & 1)*ph1.split_col; col < raw_width; col++) {
                    RAW(row,col) = curve[RAW(row,col)];
@ -1770,8 +1772,10 @@ void CLASS phase_one_correct()
 	                cx[17] = cf[17] = ((unsigned) ref[15] * 65535) / lc[qr][qc][15];
 	                cx[18] = cf[18] = 65535;
 	                cubic_spline(cx, cf, 19);
+#ifdef _OPENMP
 	                #pragma omp parallel for schedule(dynamic,16)
-                    for (int row = (qr ? ph1.split_row : 0); row < (qr ? raw_height : ph1.split_row); row++)
+#endif
+	                for (int row = (qr ? ph1.split_row : 0); row < (qr ? raw_height : ph1.split_row); row++)
                        for (int col = (qc ? ph1.split_col : 0); col < (qc ? raw_width : ph1.split_col); col++)
                            RAW(row,col) = curve[RAW(row,col)];
 	            }
@ -1787,7 +1791,9 @@ void CLASS phase_one_correct()
            qmult[1][0] = 1.0 + getreal(11);
            get4(); get4(); get4();
            qmult[1][1] = 1.0 + getreal(11);
+#ifdef _OPENMP
            #pragma omp parallel for schedule(dynamic,16)
+#endif
            for (int row=0; row < raw_height; row++) {
                for (int col=0; col < raw_width; col++) {
                    int i = qmult[row >= ph1.split_row][col >= ph1.split_col] * RAW(row,col);
@ -2329,7 +2335,9 @@ void CLASS hasselblad_correct()
        }

        // apply flatfield
+#ifdef _OPENMP
 #pragma omp parallel for
+#endif
        for (int row = 0; row < raw_height; row++) {
            int ffs, cur_ffr, i, c;
            if (row < row_offset) {
@ -4470,7 +4478,9 @@ void CLASS crop_masked_pixels()
      }
    }
  } else {
+#ifdef _OPENMP
 #pragma omp parallel for
+#endif
    for (int row=0; row < height; row++)
      for (int col=0; col < width; col++)
 	BAYER2(row,col) = RAW(row+top_margin,col+left_margin);
--- a/rtengine/demosaic_algos.cc
+++ b/rtengine/demosaic_algos.cc
@ -1528,7 +1528,7 @@ void RawImageSource::igv_interpolate(int winw, int winh)
    const int v1 = 1 * width, v2 = 2 * width, v3 = 3 * width, v4 = 4 * width, v5 = 5 * width, v6 = 6 * width;
    float* rgb[3];
    float* chr[2];
-    float (*rgbarray), *vdif, *hdif, (*chrarray);
+    float *rgbarray, *vdif, *hdif, *chrarray;

    rgbarray    = (float (*)) calloc(width * height * 3, sizeof( float));
    rgb[0] = rgbarray;
@ -1789,7 +1789,9 @@ void RawImageSource::nodemosaic(bool bw)
    red(W, H);
    green(W, H);
    blue(W, H);
+#ifdef _OPENMP
    #pragma omp parallel for
+#endif

    for (int i = 0; i < H; i++) {
        for (int j = 0; j < W; j++) {
--- a/rtengine/dfmanager.cc
+++ b/rtengine/dfmanager.cc
@ -219,10 +219,14 @@ void dfInfo::updateBadPixelList( RawImage *df )
    if( df->getSensorType() == ST_BAYER || df->getSensorType() == ST_FUJI_XTRANS ) {
        std::vector<badPix> badPixelsTemp;

+#ifdef _OPENMP
        #pragma omp parallel
+#endif
        {
            std::vector<badPix> badPixelsThread;
+#ifdef _OPENMP
            #pragma omp for nowait
+#endif

            for( int row = 2; row < df->get_height() - 2; row++)
                for( int col = 2; col < df->get_width() - 2; col++) {
@ -235,7 +239,9 @@ void dfInfo::updateBadPixelList( RawImage *df )
                    }
                }

+#ifdef _OPENMP
            #pragma omp critical
+#endif
            badPixelsTemp.insert(badPixelsTemp.end(), badPixelsThread.begin(), badPixelsThread.end());
        }
        badPixels.insert(badPixels.end(), badPixelsTemp.begin(), badPixelsTemp.end());
--- a/rtengine/dirpyr_equalizer.cc
+++ b/rtengine/dirpyr_equalizer.cc
@ -143,7 +143,9 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
        }

 #ifdef __SSE2__
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for(int i = 0; i < srcheight; i++) {
            int j;
@ -158,7 +160,9 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
        }

 #else
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for(int i = 0; i < srcheight; i++) {
            for(int j = 0; j < srcwidth; j++) {
@ -174,10 +178,14 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
        }

 #ifdef __SSE2__
+#ifdef _OPENMP
        #pragma omp parallel
+#endif
        {
            __m128 div = _mm_set1_ps(327.68f);
+#ifdef _OPENMP
            #pragma omp for
+#endif

            for(int i = 0; i < srcheight; i++) {
                int j;
@ -192,7 +200,9 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
            }
        }
 #else
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for(int i = 0; i < srcheight; i++) {
            for(int j = 0; j < srcwidth; j++) {
@ -228,7 +238,9 @@ void ImProcFunctions :: dirpyr_equalizer(float ** src, float ** dst, int srcwidt
        delete [] tmpHue;
    }

+#ifdef _OPENMP
    #pragma omp parallel for
+#endif

    for (int i = 0; i < srcheight; i++)
        for (int j = 0; j < srcwidth; j++) {
--- a/rtengine/dual_demosaic_RT.cc
+++ b/rtengine/dual_demosaic_RT.cc
@ -91,9 +91,13 @@ void RawImageSource::dual_demosaic_RT(bool isBayer, const RAWParams &raw, int wi
                                { 0.019334, 0.119193, 0.950227 }
                                };

+#ifdef _OPENMP
    #pragma omp parallel
+#endif
    {
+#ifdef _OPENMP
        #pragma omp for
+#endif
        for(int i = 0; i < winh; ++i) {
            Color::RGB2L(red[i], green[i], blue[i], L[i], xyz_rgb, winw);
        }
@ -106,19 +110,25 @@ void RawImageSource::dual_demosaic_RT(bool isBayer, const RAWParams &raw, int wi
    contrast = contrastf * 100.f;

    // the following is split into 3 loops intentionally to avoid cache conflicts on CPUs with only 4-way cache
+#ifdef _OPENMP
    #pragma omp parallel for
+#endif
    for(int i = 0; i < winh; ++i) {
        for(int j = 0; j < winw; ++j) {
            red[i][j] = intp(blend[i][j], red[i][j], redTmp[i][j]);
        }
    }
+#ifdef _OPENMP
    #pragma omp parallel for
+#endif
    for(int i = 0; i < winh; ++i) {
        for(int j = 0; j < winw; ++j) {
            green[i][j] = intp(blend[i][j], green[i][j], greenTmp[i][j]);
        }
    }
+#ifdef _OPENMP
    #pragma omp parallel for
+#endif
    for(int i = 0; i < winh; ++i) {
        for(int j = 0; j < winw; ++j) {
            blue[i][j] = intp(blend[i][j], blue[i][j], blueTmp[i][j]);
--- a/rtengine/eahd_demosaic.cc
+++ b/rtengine/eahd_demosaic.cc
@ -431,7 +431,9 @@ void RawImageSource::eahd_demosaic ()
        }

    // Interpolate R and B
+#ifdef _OPENMP
    #pragma omp parallel for
+#endif
    for (int i = 0; i < H; i++) {
        if (i == 0) {
            interpolate_row_rb_mul_pp (rawData, red[i], blue[i], nullptr, green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1);
@ -443,4 +445,4 @@ void RawImageSource::eahd_demosaic ()
    }
 }

-}
+}
--- a/rtengine/expo_before_b.cc
+++ b/rtengine/expo_before_b.cc
@ -63,13 +63,17 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2D<flo
        // No highlight protection - simple mutiplication

        if (ri->getSensorType() == ST_BAYER || ri->getSensorType() == ST_FUJI_XTRANS)
+#ifdef _OPENMP
            #pragma omp parallel for
+#endif
            for (int row = 0; row < height; row++)
                for (int col = 0; col < width; col++) {
                    rawData[row][col] *= expos;
                }
        else
+#ifdef _OPENMP
            #pragma omp parallel for
+#endif
            for (int row = 0; row < height; row++)
                for (int col = 0; col < width; col++) {
                    rawData[row][col * 3] *= expos;
@ -88,12 +92,16 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2D<flo

        // Find maximum to adjust LUTs. New float engines clips only at the very end
        float maxValFloat = 0.f;
+#ifdef _OPENMP
        #pragma omp parallel
+#endif
        {
            float maxValFloatThr = 0.f;

            if (ri->getSensorType() == ST_BAYER || ri->getSensorType() == ST_FUJI_XTRANS)
+#ifdef _OPENMP
                #pragma omp for schedule(dynamic,16) nowait
+#endif
                for(int row = 0; row < height; row++)
                    for (int col = 0; col < width; col++) {
                        if (rawData[row][col] > maxValFloatThr) {
@ -101,7 +109,9 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2D<flo
                        }
                    }
            else
+#ifdef _OPENMP
                #pragma omp for schedule(dynamic,16) nowait
+#endif
                for(int row = 0; row < height; row++)
                    for (int col = 0; col < width; col++) {
                        for (int c = 0; c < 3; c++)
@ -110,7 +120,9 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2D<flo
                            }
                    }

+#ifdef _OPENMP
            #pragma omp critical
+#endif
            {
                if(maxValFloatThr > maxValFloat) {
                    maxValFloat = maxValFloatThr;
@ -141,14 +153,18 @@ void RawImageSource::processRawWhitepoint(float expos, float preser, array2D<flo
        }

        if (ri->getSensorType() == ST_BAYER || ri->getSensorType() == ST_FUJI_XTRANS)
+#ifdef _OPENMP
            #pragma omp parallel for schedule(dynamic,16)
+#endif
            for(int row = 0; row < height; row++)
                for(int col = 0; col < width; col++) {
                    float lumi = 0.299f * red[row][col] + 0.587f * green[row][col] + 0.114f * blue[row][col];
                    rawData[row][col] *= lumi < K ? expos : lut[lumi];
                }
        else
+#ifdef _OPENMP
            #pragma omp parallel for
+#endif
            for(int row = 0; row < height; row++)
                for(int col = 0; col < width; col++) {
                    float lumi = 0.299f * rawData[row][col * 3] + 0.587f * rawData[row][col * 3 + 1] + 0.114f * rawData[row][col * 3 + 2];
--- a/rtengine/fast_demo.cc
+++ b/rtengine/fast_demo.cc
@ -77,10 +77,10 @@ void RawImageSource::fast_demosaic()
 #endif
    {

-        char (*buffer);
-        float (*greentile);
-        float (*redtile);
-        float (*bluetile);
+        char *buffer;
+        float *greentile;
+        float *redtile;
+        float *bluetile;
 #define CLF 1
        // assign working space
        buffer = (char *) calloc(3 * sizeof(float) * TS * TS + 3 * CLF * 64 + 63, 1);
--- a/rtengine/hilite_recon.cc
+++ b/rtengine/hilite_recon.cc
@ -336,7 +336,9 @@ void RawImageSource::boxblur_resamp(float **src, float **dst, float ** temp, int
        }

        // process remaining columns
+#ifdef _OPENMP
        #pragma omp single
+#endif
        {

            //vertical blur
--- a/rtengine/hphd_demosaic_RT.cc
+++ b/rtengine/hphd_demosaic_RT.cc
@ -52,7 +52,9 @@ void hphd_vertical(const array2D<float> &rawData, float** hpmap, int col_from, i
 #endif
    for (; k < col_to - 7; k += numCols) {
        for (int i = 5; i < H - 5; i++) {
+#ifdef _OPENMP
            #pragma omp simd
+#endif
            for(int h = 0; h < numCols; ++h) {
                temp[i][h] = std::fabs((rawData[i - 5][k + h] - rawData[i + 5][k + h])  - 8 * (rawData[i - 4][k + h] - rawData[i + 4][k + h]) + 27 * (rawData[i - 3][k + h] - rawData[i + 3][k + h]) - 48 * (rawData[i - 2][k + h] - rawData[i + 2][k + h]) + 42 * (rawData[i - 1][k + h] - rawData[i + 1][k + h]));
            }
@ -68,7 +70,9 @@ void hphd_vertical(const array2D<float> &rawData, float** hpmap, int col_from, i
            STVFU(avg[j][4], avgL2);
            STVFU(dev[j][4], vmaxf(epsv, (SQRV(LVFU(temp[j - 4][4]) - avgL2) + SQRV(LVFU(temp[j - 3][4]) - avgL2)) + (SQRV(LVFU(temp[j - 2][4]) - avgL2) + SQRV(LVFU(temp[j - 1][4]) - avgL2)) + (SQRV(LVFU(temp[j][4]) - avgL2) + SQRV(LVFU(temp[j + 1][4]) - avgL2)) + (SQRV(LVFU(temp[j + 2][4]) - avgL2) + SQRV(LVFU(temp[j + 3][4]) - avgL2)) + SQRV(LVFU(temp[j + 4][4]) - avgL2)));
 #else
+#ifdef _OPENMP
            #pragma omp simd
+#endif
            for(int h = 0; h < numCols; ++h) {
                const float avgL = ((temp[j - 4][h] + temp[j - 3][h]) + (temp[j - 2][h] + temp[j - 1][h]) + (temp[j][h] + temp[j + 1][h]) + (temp[j + 2][h] + temp[j + 3][h]) + temp[j + 4][h]) / 9.f;
                avg[j][h] = avgL;
@ -78,7 +82,9 @@ void hphd_vertical(const array2D<float> &rawData, float** hpmap, int col_from, i
        }

        for (int j = 5; j < H - 5; j++) {
+#ifdef _OPENMP
            #pragma omp simd
+#endif
            for(int h = 0; h < numCols; ++h) {
                const float avgL = avg[j - 1][h];
                const float avgR = avg[j + 1][h];
@ -126,12 +132,16 @@ void hphd_horizontal(const array2D<float> &rawData, float** hpmap, int row_from,
    const vfloat zd8v = F2V(0.8f);
 #endif
    for (int i = row_from; i < row_to; i++) {
+#ifdef _OPENMP
        #pragma omp simd
+#endif
        for (int j = 5; j < W - 5; j++) {
            temp[j] = std::fabs((rawData[i][j - 5] - rawData[i][j + 5]) - 8 * (rawData[i][j - 4] - rawData[i][j + 4]) + 27 * (rawData[i][j - 3] - rawData[i][j + 3]) - 48 * (rawData[i][j - 2] - rawData[i][j + 2]) + 42 * (rawData[i][j - 1] - rawData[i][j + 1]));
        }

+#ifdef _OPENMP
        #pragma omp simd
+#endif
        for (int j = 4; j < W - 4; j++) {
            const float avgL = ((temp[j - 4] + temp[j - 3]) + (temp[j - 2] + temp[j - 1]) + (temp[j] + temp[j + 1]) + (temp[j + 2] + temp[j + 3]) + temp[j + 4]) / 9.f;
            avg[j] = avgL;
@ -301,7 +311,7 @@ void RawImageSource::hphd_demosaic ()
        }
    }
 #else
-    hphd_vertical(hpmap, 0, W, H);
+    hphd_vertical(rawData, hpmap, 0, W, H);
 #endif

    if (plistener) {
@ -322,7 +332,7 @@ void RawImageSource::hphd_demosaic ()
        }
    }
 #else
-    hphd_horizontal(hpmap, 0, H);
+    hphd_horizontal(rawData, hpmap, 0, H, W);
 #endif

    if (plistener) {
@ -335,7 +345,9 @@ void RawImageSource::hphd_demosaic ()
        plistener->setProgress(0.65);
    }

+#ifdef _OPENMP
    #pragma omp parallel for
+#endif
    for (int i = 4; i < H - 4; i++) {
        interpolate_row_rb_mul_pp(rawData, red[i], blue[i], green[i - 1], green[i], green[i + 1], i, 1.0, 1.0, 1.0, 0, W, 1);
    }
--- a/rtengine/imagefloat.cc
+++ b/rtengine/imagefloat.cc
@ -108,8 +108,6 @@ void Imagefloat::setScanline (int row, unsigned char* buffer, int bps, unsigned
 }


-namespace rtengine { extern void filmlike_clip(float *r, float *g, float *b); }
-
 void Imagefloat::getScanline (int row, unsigned char* buffer, int bps, bool isFloat) const
 {

@ -441,11 +439,15 @@ void Imagefloat::calcCroppedHistogram(const ProcParams &params, float scale, LUT
    int x1, x2, y1, y2;
    params.crop.mapToResized(width, height, scale, x1, x2, y1, y2);

+#ifdef _OPENMP
    #pragma omp parallel
+#endif
    {
        LUTu histThr(65536);
        histThr.clear();
+#ifdef _OPENMP
        #pragma omp for nowait
+#endif

        for (int y = y1; y < y2; y++) {
            for (int x = x1; x < x2; x++) {
@ -461,7 +463,9 @@ void Imagefloat::calcCroppedHistogram(const ProcParams &params, float scale, LUT
            }
        }

+#ifdef _OPENMP
        #pragma omp critical
+#endif
        {
            for(int i = 0; i <= 0xffff; i++) {
                hist[i] += histThr[i];
--- a/rtengine/improccoordinator.cc
+++ b/rtengine/improccoordinator.cc
@ -1048,9 +1048,13 @@ void ImProcCoordinator::updateLRGBHistograms()
    int x1, y1, x2, y2;
    params.crop.mapToResized(pW, pH, scale, x1, x2, y1, y2);

+#ifdef _OPENMP
    #pragma omp parallel sections
+#endif
    {
+#ifdef _OPENMP
        #pragma omp section
+#endif
        {
            histChroma.clear();

@ -1060,7 +1064,9 @@ void ImProcCoordinator::updateLRGBHistograms()
                    histChroma[(int)(sqrtf(SQR(nprevl->a[i][j]) + SQR(nprevl->b[i][j])) / 188.f)]++;      //188 = 48000/256
                }
        }
+#ifdef _OPENMP
        #pragma omp section
+#endif
        {
            histLuma.clear();

@ -1070,7 +1076,9 @@ void ImProcCoordinator::updateLRGBHistograms()
                    histLuma[(int)(nprevl->L[i][j] / 128.f)]++;
                }
        }
+#ifdef _OPENMP
        #pragma omp section
+#endif
        {
            histRed.clear();
            histGreen.clear();
@ -1302,7 +1310,9 @@ void ImProcCoordinator::saveInputICCReference(const Glib::ustring& fname, bool a
        int cy = params.crop.y;
        int cw = params.crop.w;
        int ch = params.crop.h;
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for (int i = cy; i < cy + ch; i++) {
            for (int j = cx; j < cx + cw; j++) {
@ -1317,7 +1327,9 @@ void ImProcCoordinator::saveInputICCReference(const Glib::ustring& fname, bool a
    }

    // image may contain out of range samples, clip them to avoid wrap-arounds
+#ifdef _OPENMP
    #pragma omp parallel for
+#endif

    for (int i = 0; i < im->getHeight(); i++) {
        for (int j = 0; j < im->getWidth(); j++) {
--- a/rtengine/improcfun.cc
+++ b/rtengine/improcfun.cc
@ -777,7 +777,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw
                    hist16Qthr.clear();
                }

+#ifdef _OPENMP
                #pragma omp for reduction(+:sum)
+#endif


                for (int i = 0; i < height; i++)
@ -846,7 +848,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw
                        //can be used in case of...
                    }

+#ifdef _OPENMP
                #pragma omp critical
+#endif
                {
                    if (needJ) {
                        hist16J += hist16Jthr;
@ -995,7 +999,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw
        int bufferLength = ((width + 3) / 4) * 4; // bufferLength has to be a multiple of 4
 #endif
 #ifndef _DEBUG
+#ifdef _OPENMP
        #pragma omp parallel
+#endif
 #endif
        {
            float minQThr = 10000.f;
@ -1010,7 +1016,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw
            float sbuffer[bufferLength] ALIGNED16;
 #endif
 #ifndef _DEBUG
+#ifdef _OPENMP
            #pragma omp for schedule(dynamic, 16)
+#endif
 #endif

            for (int i = 0; i < height; i++) {
@ -1618,7 +1626,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw
 #endif
            }

+#ifdef _OPENMP
            #pragma omp critical
+#endif
            {
                if (minQThr < minQ) {
                    minQ = minQThr;
@ -1731,11 +1741,15 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw


 #ifndef _DEBUG
+#ifdef _OPENMP
                #pragma omp parallel
+#endif
 #endif
                {
 #ifndef _DEBUG
+#ifdef _OPENMP
                    #pragma omp for schedule(dynamic, 10)
+#endif
 #endif

                    for (int i = 0; i < height; i++) // update CieImages with new values after sharpening, defringe, contrast by detail level
@ -1768,7 +1782,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw
            const float co_e = (pow_F (f_l, 0.25f)) + eps;

 #ifndef _DEBUG
+#ifdef _OPENMP
            #pragma omp parallel
+#endif
 #endif
            {
 #ifdef __SSE2__
@ -1782,7 +1798,9 @@ void ImProcFunctions::ciecam_02float (CieImage* ncie, float adap, int pW, int pw
 #endif

 #ifndef _DEBUG
+#ifdef _OPENMP
                #pragma omp for schedule(dynamic, 10)
+#endif
 #endif

                for (int i = 0; i < height; i++) { // update CIECAM with new values after tone-mapping
@ -4120,7 +4138,9 @@ void ImProcFunctions::luminanceCurve (LabImage* lold, LabImage* lnew, LUTf & cur
    int W = lold->W;
    int H = lold->H;

+#ifdef _OPENMP
    #pragma omp parallel for if (multiThread)
+#endif

    for (int i = 0; i < H; i++)
        for (int j = 0; j < W; j++) {
@ -4386,17 +4406,21 @@ void ImProcFunctions::chromiLuminanceCurve (PipetteBuffer *pipetteBuffer, int pW
        {wprof[2][0], wprof[2][1], wprof[2][2]}
    };

+#ifdef _OPENMP
 #ifdef _DEBUG
    #pragma omp parallel default(shared) firstprivate(lold, lnew, MunsDebugInfo, pW) if (multiThread)
 #else
    #pragma omp parallel if (multiThread)
+#endif
 #endif
    {
 #ifdef __SSE2__
        float HHBuffer[W] ALIGNED16;
        float CCBuffer[W] ALIGNED16;
 #endif
+#ifdef _OPENMP
        #pragma omp for schedule(dynamic, 16)
+#endif

        for (int i = 0; i < H; i++) {
            if (avoidColorShift)
@ -5136,7 +5160,9 @@ void ImProcFunctions::EPDToneMapCIE (CieImage *ncie, float a_w, float c_, int Wi

    EdgePreservingDecomposition epd (Wid, Hei);

+#ifdef _OPENMP
    #pragma omp parallel for
+#endif

    for (int i = 0; i < Hei; i++)
        for (int j = 0; j < Wid; j++) {
@ -5162,7 +5188,9 @@ void ImProcFunctions::EPDToneMapCIE (CieImage *ncie, float a_w, float c_, int Wi
    //Restore past range, also desaturate a bit per Mantiuk's Color correction for tone mapping.
    float s = (1.0f + 38.7889f) * powf (Compression, 1.5856f) / (1.0f + 38.7889f * powf (Compression, 1.5856f));
 #ifndef _DEBUG
+#ifdef _OPENMP
    #pragma omp parallel for schedule(dynamic,10)
+#endif
 #endif

    for (int i = 0; i < Hei; i++)
@ -5243,11 +5271,15 @@ void ImProcFunctions::EPDToneMap (LabImage *lab, unsigned int Iterates, int skip
    //Due to the taking of logarithms, L must be nonnegative. Further, scale to 0 to 1 using nominal range of L, 0 to 15 bit.
    float minL = FLT_MAX;
    float maxL = 0.f;
+#ifdef _OPENMP
    #pragma omp parallel
+#endif
    {
        float lminL = FLT_MAX;
        float lmaxL = 0.f;
+#ifdef _OPENMP
        #pragma omp for
+#endif

        for (size_t i = 0; i < N; i++) {
            if (L[i] < lminL) {
@ -5259,7 +5291,9 @@ void ImProcFunctions::EPDToneMap (LabImage *lab, unsigned int Iterates, int skip
            }
        }

+#ifdef _OPENMP
        #pragma omp critical
+#endif
        {
            if (lminL < minL) {
                minL = lminL;
@ -5279,7 +5313,9 @@ void ImProcFunctions::EPDToneMap (LabImage *lab, unsigned int Iterates, int skip
        maxL = 1.f;
    }

+#ifdef _OPENMP
    #pragma omp parallel for
+#endif

    for (size_t i = 0; i < N; ++i)
        //{L[i] = (L[i] - minL)/32767.0f;
--- a/rtengine/ipresize.cc
+++ b/rtengine/ipresize.cc
@ -54,7 +54,9 @@ void ImProcFunctions::Lanczos (const Imagefloat* src, Imagefloat* dst, float sca
    const float sc = min (scale, 1.0f);
    const int support = static_cast<int> (2.0f * a / sc) + 1;

+#ifdef _OPENMP
    #pragma omp parallel
+#endif
    {
        // storage for precomputed parameters for horisontal interpolation
        float * wwh = new float[support * dst->getWidth()];
@ -97,7 +99,9 @@ void ImProcFunctions::Lanczos (const Imagefloat* src, Imagefloat* dst, float sca
        }

        // Phase 2: do actual interpolation
+#ifdef _OPENMP
        #pragma omp for
+#endif

        for (int i = 0; i < dst->getHeight(); i++) {

--- a/rtengine/iptransform.cc
+++ b/rtengine/iptransform.cc
@ -705,7 +705,9 @@ void ImProcFunctions::transformLuminanceOnly (Imagefloat* original, Imagefloat*
    }

    bool darkening = (params->vignetting.amount <= 0.0);
+#ifdef _OPENMP
    #pragma omp parallel for schedule(dynamic,16) if (multiThread)
+#endif

    for (int y = 0; y < transformed->getHeight(); y++) {
        double vig_y_d = applyVignetting ? (double) (y + cy) - vig_h2 : 0.0;
@ -817,7 +819,9 @@ void ImProcFunctions::transformGeneral(bool highQuality, Imagefloat *original, I
 #endif
    // main cycle
    bool darkening = (params->vignetting.amount <= 0.0);
+#ifdef _OPENMP
    #pragma omp parallel for if (multiThread)
+#endif

    for (int y = 0; y < transformed->getHeight(); y++) {
        for (int x = 0; x < transformed->getWidth(); x++) {
@ -964,7 +968,9 @@ void ImProcFunctions::transformLCPCAOnly(Imagefloat *original, Imagefloat *trans
    chTrans[1] = transformed->g.ptrs;
    chTrans[2] = transformed->b.ptrs;

+#ifdef _OPENMP
    #pragma omp parallel for if (multiThread)
+#endif

    for (int y = 0; y < transformed->getHeight(); y++) {
        for (int x = 0; x < transformed->getWidth(); x++) {
--- a/rtengine/ipvibrance.cc
+++ b/rtengine/ipvibrance.cc
@ -40,8 +40,6 @@ namespace rtengine

 using namespace procparams;

-extern const Settings* settings;
-
 void fillCurveArrayVib (DiagonalCurve* diagCurve, LUTf &outCurve)
 {

@ -170,7 +168,9 @@ void ImProcFunctions::vibrance (LabImage* lab)
        {static_cast<float>(wiprof[2][0]), static_cast<float>(wiprof[2][1]), static_cast<float>(wiprof[2][2])}
    };

+#ifdef _OPENMP
    #pragma omp parallel if (multiThread)
+#endif
    {

 #ifdef __SSE2__
@ -179,7 +179,9 @@ void ImProcFunctions::vibrance (LabImage* lab)
 #endif
        float sathue[5], sathue2[4]; // adjust sat in function of hue

+#ifdef _OPENMP
        #pragma omp for schedule(dynamic, 16)
+#endif

        for (int i = 0; i < height; i++) {
 #ifdef __SSE2__
--- a/rtengine/previewimage.cc
+++ b/rtengine/previewimage.cc
@ -77,7 +77,9 @@ PreviewImage::PreviewImage (const Glib::ustring &fname, const Glib::ustring &ext
                previewImage = Cairo::ImageSurface::create(Cairo::FORMAT_RGB24, w, h);
                previewImage->flush();

+#ifdef _OPENMP
                #pragma omp parallel for
+#endif
                for (unsigned int i = 0; i < (unsigned int)(h); ++i) {
                    const unsigned char *src = data + i * w * 3;
                    unsigned char *dst = previewImage->get_data() + i * w * 4;
@ -119,7 +121,9 @@ PreviewImage::PreviewImage (const Glib::ustring &fname, const Glib::ustring &ext
            rawImage.getImage (wb, TR_NONE, &image, pp, params.toneCurve, params.raw);
            rtengine::Image8 output(fw, fh);
            rawImage.convertColorSpace(&image, params.icm, wb);
+#ifdef _OPENMP
            #pragma omp parallel for schedule(dynamic, 10)
+#endif
            for (int i = 0; i < fh; ++i)
                for (int j = 0; j < fw; ++j) {
                    image.r(i, j) = Color::gamma2curve[image.r(i, j)];
@ -139,7 +143,9 @@ PreviewImage::PreviewImage (const Glib::ustring &fname, const Glib::ustring &ext
                previewImage = Cairo::ImageSurface::create(Cairo::FORMAT_RGB24, w, h);
                previewImage->flush();

-                #pragma omp parallel for 
+#ifdef _OPENMP
+                #pragma omp parallel for
+#endif
                for (unsigned int i = 0; i < (unsigned int)(h); i++) {
                    const unsigned char *src = data + i * w * 3;
                    unsigned char *dst = previewImage->get_data() + i * w * 4;
--- a/rtengine/rawimage.cc
+++ b/rtengine/rawimage.cc
@ -127,7 +127,9 @@ void RawImage::get_colorsCoeff( float *pre_mul_, float *scale_mul_, float *cblac
            dsum[FC(1, 0) + 4] += (int)(((W + 1) / 2) * (H / 2));
            dsum[FC(1, 1) + 4] += (int)((W / 2) * (H / 2));

+#ifdef _OPENMP
            #pragma omp parallel private(val)
+#endif
            {
                double dsumthr[8];
                memset(dsumthr, 0, sizeof dsumthr);
@ -142,7 +144,9 @@ void RawImage::get_colorsCoeff( float *pre_mul_, float *scale_mul_, float *cblac
                }

                float *tempdata = data[0];
+#ifdef _OPENMP
                #pragma omp for nowait
+#endif

                for (size_t row = 0; row < H; row += 8) {
                    size_t ymax = row + 8 < H ? row + 8 : H;
@ -176,7 +180,9 @@ skip_block2:
                    }
                }

+#ifdef _OPENMP
                #pragma omp critical
+#endif
                {
                    for (int c = 0; c < 4; c++) {
                        dsum[c] += dsumthr[c];
@ -194,7 +200,9 @@ skip_block2:
            }

        } else if(isXtrans()) {
+#ifdef _OPENMP
            #pragma omp parallel
+#endif
            {
                double dsumthr[8];
                memset(dsumthr, 0, sizeof dsumthr);
@ -209,7 +217,9 @@ skip_block2:
                    whitefloat[c] = this->get_white(c) - whiteThreshold;
                }

+#ifdef _OPENMP
                #pragma omp for nowait
+#endif

                for (size_t row = 0; row < H; row += 8)
                    for (size_t col = 0; col < W ; col += 8)
@ -239,7 +249,9 @@ skip_block3:
                        ;
                    }

+#ifdef _OPENMP
                #pragma omp critical
+#endif
                {
                    for (int c = 0; c < 8; c++)
                    {
@ -716,7 +728,9 @@ float** RawImage::compress_image(unsigned int frameNum, bool freeImage)

    // copy pixel raw data: the compressed format earns space
    if( float_raw_image ) {
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for (int row = 0; row < height; row++)
            for (int col = 0; col < width; col++) {
@ -726,21 +740,27 @@ float** RawImage::compress_image(unsigned int frameNum, bool freeImage)
        delete [] float_raw_image;
        float_raw_image = nullptr;
    } else if (filters != 0 && !isXtrans()) {
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for (int row = 0; row < height; row++)
            for (int col = 0; col < width; col++) {
                this->data[row][col] = image[row * width + col][FC(row, col)];
            }
    } else if (isXtrans()) {
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for (int row = 0; row < height; row++)
            for (int col = 0; col < width; col++) {
                this->data[row][col] = image[row * width + col][XTRANSFC(row, col)];
            }
    } else if (colors == 1) {
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for (int row = 0; row < height; row++)
            for (int col = 0; col < width; col++) {
@ -751,7 +771,9 @@ float** RawImage::compress_image(unsigned int frameNum, bool freeImage)
            height -= top_margin;
            width -= left_margin;
        }
+#ifdef _OPENMP
        #pragma omp parallel for
+#endif

        for (int row = 0; row < height; row++)
            for (int col = 0; col < width; col++) {
--- a/rtengine/rawimagesource.cc
+++ b/rtengine/rawimagesource.cc
@ -3407,7 +3407,9 @@ void RawImageSource::cfaboxblur(RawImage *riFlatFile, float* cfablur, int boxH,

            }

+#ifdef _OPENMP
            #pragma omp single
+#endif

            for (int col = W - (W % 8); col < W; col++) {
                int len = boxH / 2 + 1;
--- a/rtengine/simpleprocess.cc
+++ b/rtengine/simpleprocess.cc
@ -335,13 +335,17 @@ private:
                LUTf gamcurve (65536, 0);
                float gam, gamthresh, gamslope;
                ipf.RGB_denoise_infoGamCurve (params.dirpyrDenoise, imgsrc->isRAW(), gamcurve, gam, gamthresh, gamslope);
+#ifdef _OPENMP
                #pragma omp parallel
+#endif
                {
                    Imagefloat *origCropPart;//init auto noise
                    origCropPart = new Imagefloat (crW, crH);//allocate memory
                    Imagefloat *provicalc = new Imagefloat ((crW + 1) / 2, (crH + 1) / 2); //for denoise curves
                    int skipP = 1;
+#ifdef _OPENMP
                    #pragma omp for schedule(dynamic) collapse(2) nowait
+#endif

                    for (int wcr = 0; wcr < numtiles_W; wcr++) {
                        for (int hcr = 0; hcr < numtiles_H; hcr++) {
@ -557,13 +561,17 @@ private:
                coordH[0] = begH;
                coordH[1] = fh / 2 - crH / 2;
                coordH[2] = fh - crH - begH;
+#ifdef _OPENMP
                #pragma omp parallel
+#endif
                {
                    Imagefloat *origCropPart;//init auto noise
                    origCropPart = new Imagefloat (crW, crH);//allocate memory
                    Imagefloat *provicalc = new Imagefloat ((crW + 1) / 2, (crH + 1) / 2); //for denoise curves

+#ifdef _OPENMP
                    #pragma omp for schedule(dynamic) collapse(2) nowait
+#endif

                    for (int wcr = 0; wcr <= 2; wcr++) {
                        for (int hcr = 0; hcr <= 2; hcr++) {
@ -807,7 +815,9 @@ private:
        if (denoiseParams.enabled  && (noiseLCurve || noiseCCurve )) {
            // we only need image reduced to 1/4 here
            calclum = new Imagefloat ((fw + 1) / 2, (fh + 1) / 2); //for luminance denoise curve
+#ifdef _OPENMP
            #pragma omp parallel for
+#endif

            for (int ii = 0; ii < fh; ii += 2) {
                for (int jj = 0; jj < fw; jj += 2) {
@ -1031,7 +1041,9 @@ private:
                        hist16thr[ (int) ((labView->L[i][j]))]++;
                    }

+#ifdef _OPENMP
                #pragma omp critical
+#endif
                {
                    hist16 += hist16thr;
                }
--- a/rtengine/tmo_fattal02.cc
+++ b/rtengine/tmo_fattal02.cc
@ -187,7 +187,9 @@ void gaussianBlur (const Array2Df& I, Array2Df& L, bool multithread)
    Array2Df T (width, height);

    //--- X blur
+#ifdef _OPENMP
    #pragma omp parallel for shared(I, T) if(multithread)
+#endif

    for ( int y = 0 ; y < height ; y++ ) {
        for ( int x = 1 ; x < width - 1 ; x++ ) {
@ -202,7 +204,9 @@ void gaussianBlur (const Array2Df& I, Array2Df& L, bool multithread)
    }

    //--- Y blur
+#ifdef _OPENMP
    #pragma omp parallel for if(multithread)
+#endif

    for ( int x = 0 ; x < width - 7 ; x += 8 ) {
        for ( int y = 1 ; y < height - 1 ; y++ ) {
@ -279,7 +283,9 @@ float calculateGradients (Array2Df* H, Array2Df* G, int k, bool multithread)
    const float divider = pow ( 2.0f, k + 1 );
    double avgGrad = 0.0; // use double precision for large summations

+#ifdef _OPENMP
    #pragma omp parallel for reduction(+:avgGrad) if(multithread)
+#endif

    for ( int y = 0 ; y < height ; y++ ) {
        int n = (y == 0 ? 0 : y - 1);
@ -354,7 +360,9 @@ void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[],

    fi[nlevels - 1] = new Array2Df (width, height);

+#ifdef _OPENMP
    #pragma omp parallel for shared(fi) if(multithread)
+#endif
    for ( int k = 0 ; k < width * height ; k++ ) {
        (*fi[nlevels - 1]) (k) = 1.0f;
    }
@ -366,7 +374,9 @@ void calculateFiMatrix (Array2Df* FI, Array2Df* gradients[],
        // only apply gradients to levels>=detail_level but at least to the coarsest
        if ((k >= detail_level || k == nlevels - 1) && beta != 1.f)  {
            //DEBUG_STR << "calculateFiMatrix: apply gradient to level " << k << endl;
+#ifdef _OPENMP
            #pragma omp parallel for shared(fi,avgGrad) if(multithread)
+#endif
            for ( int y = 0; y < height; y++ ) {
                for ( int x = 0; x < width; x++ ) {
                    float grad = ((*gradients[k]) (x, y) < 1e-4f) ? 1e-4 : (*gradients[k]) (x, y);
@ -455,7 +465,9 @@ void tmo_fattal02 (size_t width,
    // float minLum = Y (0, 0);
    float maxLum = Y (0, 0);

+#ifdef _OPENMP
    #pragma omp parallel for reduction(max:maxLum) if(multithread)
+#endif

    for ( int i = 0 ; i < size ; i++ ) {
        maxLum = std::max (maxLum, Y (i));
@ -464,13 +476,17 @@ void tmo_fattal02 (size_t width,
    Array2Df* H = new Array2Df (width, height);
    float temp = 100.f / maxLum;
    float eps = 1e-4f;
+#ifdef _OPENMP
    #pragma omp parallel if(multithread)
+#endif
    {
 #ifdef __SSE2__
        vfloat epsv = F2V (eps);
        vfloat tempv = F2V (temp);
 #endif
+#ifdef _OPENMP
        #pragma omp for schedule(dynamic,16)
+#endif

        for (size_t i = 0 ; i < height ; ++i) {
            size_t j = 0;
@ -573,7 +589,9 @@ void tmo_fattal02 (size_t width,
    // boundary conditions, so we need to adjust the assembly of the right hand
    // side accordingly (basically fft solver assumes U(-1) = U(1), whereas zero
    // Neumann conditions assume U(-1)=U(0)), see also divergence calculation
+#ifdef _OPENMP
    #pragma omp parallel for if(multithread)
+#endif

    for ( size_t y = 0 ; y < height ; y++ ) {
        // sets index+1 based on the boundary assumption H(N+1)=H(N-1)
@ -591,7 +609,9 @@ void tmo_fattal02 (size_t width,
    delete H;

    // calculate divergence
+#ifdef _OPENMP
    #pragma omp parallel for if(multithread)
+#endif

    for ( size_t y = 0; y < height; ++y ) {
        for ( size_t x = 0; x < width; ++x ) {
@ -626,12 +646,16 @@ void tmo_fattal02 (size_t width,
    delete Gx;
    delete FI;

+#ifdef _OPENMP
    #pragma omp parallel if(multithread)
+#endif
    {
 #ifdef __SSE2__
        vfloat gammav = F2V (gamma);
 #endif
+#ifdef _OPENMP
        #pragma omp for schedule(dynamic,16)
+#endif

        for (size_t i = 0 ; i < height ; i++) {
            size_t j = 0;
@ -706,7 +730,9 @@ void transform_ev2normal (Array2Df *A, Array2Df *T, bool multithread)

    // the discrete cosine transform is not exactly the transform needed
    // need to scale input values to get the right transformation
+#ifdef _OPENMP
    #pragma omp parallel for if(multithread)
+#endif

    for (int y = 1 ; y < height - 1 ; y++ )
        for (int x = 1 ; x < width - 1 ; x++ ) {
@ -757,7 +783,9 @@ void transform_normal2ev (Array2Df *A, Array2Df *T, bool multithread)

    // need to scale the output matrix to get the right transform
    float factor = (1.0f / ((height - 1) * (width - 1)));
+#ifdef _OPENMP
    #pragma omp parallel for if(multithread)
+#endif

    for (int y = 0 ; y < height ; y++ )
        for (int x = 0 ; x < width ; x++ ) {
@ -876,7 +904,9 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*
    std::vector<double> l1 = get_lambda (height);
    std::vector<double> l2 = get_lambda (width);

+#ifdef _OPENMP
    #pragma omp parallel for if(multithread)
+#endif

    for (int y = 0 ; y < height ; y++ ) {
        for (int x = 0 ; x < width ; x++ ) {
@ -896,13 +926,17 @@ void solve_pde_fft (Array2Df *F, Array2Df *U, Array2Df *buf, bool multithread)/*
    // (not really needed but good for numerics as we later take exp(U))
    //DEBUG_STR << "solve_pde_fft: removing constant from solution" << std::endl;
    float max = 0.f;
+#ifdef _OPENMP
    #pragma omp parallel for reduction(max:max) if(multithread)
+#endif

    for (int i = 0; i < width * height; i++) {
        max = std::max (max, (*U) (i));
    }

+#ifdef _OPENMP
    #pragma omp parallel for if(multithread)
+#endif

    for (int i = 0; i < width * height; i++) {
        (*U) (i) -= max;
--- a/rtgui/cropwindow.cc
+++ b/rtgui/cropwindow.cc
@ -1493,7 +1493,9 @@ void CropWindow::expose (Cairo::RefPtr<Cairo::Context> cr)
                            }
                        }

+#ifdef _OPENMP
                        #pragma omp critical
+#endif
                        {
                            if(maxthrstdDev_L2 > maxstdDev_L2) {
                                maxstdDev_L2 = maxthrstdDev_L2;