From 0c0a585c8c035d92712f1b012f27fb21dba6d813 Mon Sep 17 00:00:00 2001 From: Ingo Date: Mon, 10 Feb 2014 12:24:35 +0100 Subject: [PATCH] Optimization for RawImageSource::getRAWHistogram, Issue 2238 --- rtengine/rawimagesource.cc | 87 ++++++++++++++++++++++---------------- 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/rtengine/rawimagesource.cc b/rtengine/rawimagesource.cc index 19c964ce0..94d897744 100644 --- a/rtengine/rawimagesource.cc +++ b/rtengine/rawimagesource.cc @@ -2452,62 +2452,77 @@ void RawImageSource::getAutoExpHistogram (LUTu & histogram, int& histcompr) { // Histogram MUST be 256 in size; gamma is applied, blackpoint and gain also void RawImageSource::getRAWHistogram (LUTu & histRedRaw, LUTu & histGreenRaw, LUTu & histBlueRaw) { - histRedRaw.clear(); histGreenRaw.clear(); histBlueRaw.clear(); - float mult[4] = { 65535.0 / ri->get_white(0), 65535.0 / ri->get_white(1), 65535.0 / ri->get_white(2), 65535.0 / ri->get_white(3) }; -#pragma omp parallel -{ - LUTu tmphistRedRaw( 256 ); - LUTu tmphistGreenRaw( 256 ); - LUTu tmphistBlueRaw( 256 ); - tmphistRedRaw.clear(); - tmphistGreenRaw.clear(); - tmphistBlueRaw.clear(); + histRedRaw.clear(); histGreenRaw.clear(); histBlueRaw.clear(); + const float mult[4] = { 65535.0 / ri->get_white(0), 65535.0 / ri->get_white(1), 65535.0 / ri->get_white(2), 65535.0 / ri->get_white(3) }; +#ifdef _OPENMP + int numThreads; + // reduce the number of threads under certain conditions to avoid overhaed of too many critical regions + numThreads = sqrt((((H-2*border)*(W-2*border))/262144.f)); + numThreads = std::min(std::max(numThreads,1), omp_get_max_threads()); + +#pragma omp parallel num_threads(numThreads) +#endif +{ + // we need one LUT per color and thread, which corresponds to 1 MB per thread + LUTu tmphist[4]; + tmphist[0](65536);tmphist[0].clear(); + tmphist[1](65536);tmphist[1].clear(); + tmphist[2](65536);tmphist[2].clear(); + tmphist[3](65536);tmphist[3].clear(); + +#ifdef _OPENMP #pragma omp for nowait +#endif for (int i=border; iisBayer()) { - for (int j=start; jdata[i][j]-(cblacksom[c4]/*+black_lev[c4]*/)))); - - switch (c) { - case 0: tmphistRedRaw[idx>>8]++; break; - case 1: tmphistGreenRaw[idx>>8]++; break; - case 2: tmphistBlueRaw[idx>>8]++; break; - } + int j; + int c1 = FC(i,start); + c1 = ( c1 == 1 && !(i&1) ) ? 3 : c1; + int c2 = FC(i,start+1); + c2 = ( c2 == 1 && !(i&1) ) ? 3 : c2; + for (j=start; jdata[i][j]]++; + tmphist[c2][ri->data[i][j+1]]++; + } + if(jdata[i][j]]++; } } else { for (int j=start; jdata[i][3*j+c]-cblacksom[c]))); - - switch (c) { - case 0: tmphistRedRaw[idx>>8]++; break; - case 1: tmphistGreenRaw[idx>>8]++; break; - case 2: tmphistBlueRaw[idx>>8]++; break; - } + tmphist[c][ri->data[i][3*j+c]]++; } } } } +#ifdef _OPENMP #pragma omp critical +#endif { - for(int i=0;i<256;i++){ - histRedRaw[i] += tmphistRedRaw[i]; - histGreenRaw[i] += tmphistGreenRaw[i]; - histBlueRaw[i] += tmphistBlueRaw[i]; + for(int i=0;i<65536;i++){ + int idx; + idx = CLIP((int)Color::gamma(mult[0]*(i-(cblacksom[0]/*+black_lev[0]*/)))); + histRedRaw[idx>>8] += tmphist[0][i]; + idx = CLIP((int)Color::gamma(mult[1]*(i-(cblacksom[1]/*+black_lev[1]*/)))); + histGreenRaw[idx>>8] += tmphist[1][i]; + idx = CLIP((int)Color::gamma(mult[3]*(i-(cblacksom[3]/*+black_lev[3]*/)))); + histGreenRaw[idx>>8] += tmphist[3][i]; + idx = CLIP((int)Color::gamma(mult[2]*(i-(cblacksom[2]/*+black_lev[2]*/)))); + histBlueRaw[idx>>8] += tmphist[2][i]; } -} -} +} // end of critical region +} // end of parallel region // since there are twice as many greens, correct for it - if (ri->isBayer()) for (int i=0;i<256;i++) histGreenRaw[i]>>=1; + if (ri->isBayer()) + for (int i=0;i<256;i++) + histGreenRaw[i]>>=1; + } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%