From 33f565a1bacf2a05c1c053b476287eb61965da50 Mon Sep 17 00:00:00 2001 From: Oliver Duis Date: Sun, 1 Jul 2012 22:20:09 +0200 Subject: [PATCH] Fixed multiprocessor sharpening bug see issue 1454 --- rtengine/PF_correct_RT.cc | 15 +++++------ rtengine/alignedbuffer.h | 47 ++++++++++++++++++++++++++++++++- rtengine/gauss.h | 54 ++++++++++++++++++++++---------------- rtengine/imagefloat.cc | 15 ++++++++--- rtengine/impulse_denoise.h | 15 ++++++----- rtengine/iplab2rgb.cc | 7 ++++- rtengine/ipsharpen.cc | 22 +++++++--------- rtengine/shmap.cc | 23 ++++++++-------- rtengine/simpleprocess.cc | 2 +- 9 files changed, 132 insertions(+), 68 deletions(-) diff --git a/rtengine/PF_correct_RT.cc b/rtengine/PF_correct_RT.cc index 3d35f0bf7..ed8f623c7 100644 --- a/rtengine/PF_correct_RT.cc +++ b/rtengine/PF_correct_RT.cc @@ -56,16 +56,15 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu #pragma omp parallel #endif { - AlignedBuffer* buffer = new AlignedBuffer (max(src->W,src->H)); - gaussHorizontal (src->a, tmp1->a, buffer, src->W, src->H, radius, multiThread); - gaussHorizontal (src->b, tmp1->b, buffer, src->W, src->H, radius, multiThread); - gaussVertical (tmp1->a, tmp1->a, buffer, src->W, src->H, radius, multiThread); - gaussVertical (tmp1->b, tmp1->b, buffer, src->W, src->H, radius, multiThread); + AlignedBufferMP buffer(max(src->W,src->H)); - gaussHorizontal (src->L, tmp1->L, buffer, src->W, src->H, radius, multiThread); - gaussVertical (tmp1->L, tmp1->L, buffer, src->W, src->H, radius, multiThread); + gaussHorizontal (src->a, tmp1->a, buffer, src->W, src->H, radius); + gaussHorizontal (src->b, tmp1->b, buffer, src->W, src->H, radius); + gaussVertical (tmp1->a, tmp1->a, buffer, src->W, src->H, radius); + gaussVertical (tmp1->b, tmp1->b, buffer, src->W, src->H, radius); - delete buffer; + gaussHorizontal (src->L, tmp1->L, buffer, src->W, src->H, radius); + gaussVertical (tmp1->L, tmp1->L, buffer, src->W, src->H, radius); } //#ifdef _OPENMP diff --git a/rtengine/alignedbuffer.h b/rtengine/alignedbuffer.h index 430b0659b..057b654ad 100644 --- a/rtengine/alignedbuffer.h +++ b/rtengine/alignedbuffer.h @@ -1,7 +1,7 @@ /* * This file is part of RawTherapee. * - * Copyright (c) 2004-2010 Gabor Horvath +* Copyright (c) 2004-2012 Gabor Horvath , Oliver Duis * * RawTherapee is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,7 +19,10 @@ #ifndef _ALIGNEDBUFFER_ #define _ALIGNEDBUFFER_ #include +#include +#include +// Aligned buffer that should be faster template class AlignedBuffer { private: @@ -27,10 +30,12 @@ template class AlignedBuffer { public: T* data ; + bool inUse; AlignedBuffer (size_t size, size_t align=16) { real = new T[size+2*align]; data = (T*)((uintptr_t)real + (align-((uintptr_t)real)%align)); + inUse=true; } ~AlignedBuffer () { @@ -38,4 +43,44 @@ template class AlignedBuffer { } }; +// Multi processor version, use with OpenMP +template class AlignedBufferMP { +private: + Glib::Mutex mtx; + std::vector*> buffers; + size_t size; + +public: + AlignedBufferMP(size_t sizeP) { + size=sizeP; + } + + ~AlignedBufferMP() { + for (int i=0;i* acquire() { + Glib::Mutex::Lock lock(mtx); + + // Find available buffer + for (int i;iinUse) { + buffers[i]->inUse=true; + return buffers[i]; + } + } + + // Add new buffer if nothing is free + AlignedBuffer* buffer=new AlignedBuffer(size); + buffers.push_back(buffer); + + return buffer; + } + + void release(AlignedBuffer* buffer) { + Glib::Mutex::Lock lock(mtx); + + buffer->inUse=false; + } +}; #endif diff --git a/rtengine/gauss.h b/rtengine/gauss.h index 725455463..0ff5872fb 100644 --- a/rtengine/gauss.h +++ b/rtengine/gauss.h @@ -26,45 +26,55 @@ #ifdef _OPENMP #include #endif +#include +#include // classical filtering if the support window is small: -template void gaussHorizontal3 (T** src, T** dst, T* buffer, int W, int H, const float c0, const float c1, bool multiThread) { - +template void gaussHorizontal3 (T** src, T** dst, AlignedBufferMP &buffer, int W, int H, const float c0, const float c1) { #ifdef _OPENMP #pragma omp for #endif for (int i=0; i* pBuf = buffer.acquire(); + T* temp=(T*)pBuf->data; + for (int j=1; j void gaussVertical3 (T** src, T** dst, T* buffer, int W, int H, const float c0, const float c1, bool multiThread) { +template void gaussVertical3 (T** src, T** dst, AlignedBufferMP &buffer, int W, int H, const float c0, const float c1) { - //#pragma omp parallel for if (multiThread) #ifdef _OPENMP #pragma omp for #endif for (int i=0; i* pBuf = buffer.acquire(); + T* temp = (T*)pBuf->data; + for (int j = 1; j void gaussHorizontal (T** src, T** dst, AlignedBuffer* buffer, int W, int H, double sigma, bool multiThread) { +template void gaussHorizontal (T** src, T** dst, AlignedBufferMP &buffer, int W, int H, double sigma) { if (sigma<0.25) { // dont perform filtering @@ -81,7 +91,7 @@ template void gaussHorizontal (T** src, T** dst, AlignedBuffer* double csum = 2.0 * c1 + 1.0; c1 /= csum; double c0 = 1.0 / csum; - gaussHorizontal3 (src, dst, (T*)(buffer->data), W, H, c0, c1, multiThread); + gaussHorizontal3 (src, dst, buffer, W, H, c0, c1); return; } @@ -113,10 +123,12 @@ template void gaussHorizontal (T** src, T** dst, AlignedBuffer* for (int i=0; i<3; i++) for (int j=0; j<3; j++) M[i][j] /= (1.0+b1-b2+b3)*(1.0+b2+(b1-b3)*b3); - // if (multiThread) + #pragma omp for for (int i=0; idata; + AlignedBuffer* pBuf = buffer.acquire(); + double* temp2 = pBuf->data; + temp2[0] = B * src[i][0] + b1*src[i][0] + b2*src[i][0] + b3*src[i][0]; temp2[1] = B * src[i][1] + b1*temp2[0] + b2*src[i][0] + b3*src[i][0]; temp2[2] = B * src[i][2] + b1*temp2[1] + b2*temp2[0] + b3*src[i][0]; @@ -136,10 +148,13 @@ template void gaussHorizontal (T** src, T** dst, AlignedBuffer* temp2[j] = B * temp2[j] + b1*temp2[j+1] + b2*temp2[j+2] + b3*temp2[j+3]; for (int j=0; j void gaussVertical (T** src, T** dst, AlignedBuffer* buffer, int W, int H, double sigma, bool multiThread) { +} + +template void gaussVertical (T** src, T** dst, AlignedBufferMP &buffer, int W, int H, double sigma) { if (sigma<0.25) { // dont perform filtering @@ -156,7 +171,7 @@ template void gaussVertical (T** src, T** dst, AlignedBuffer* b double csum = 2.0 * c1 + 1.0; c1 /= csum; double c0 = 1.0 / csum; - gaussVertical3 (src, dst, (T*)(buffer->data), W, H, c0, c1, multiThread); + gaussVertical3 (src, dst, buffer, W, H, c0, c1); return; } @@ -192,7 +207,8 @@ template void gaussVertical (T** src, T** dst, AlignedBuffer* b #pragma omp for #endif for (int i=0; idata; + AlignedBuffer* pBuf = buffer.acquire(); + double* temp2 = pBuf->data; temp2[0] = B * src[0][i] + b1*src[0][i] + b2*src[0][i] + b3*src[0][i]; temp2[1] = B * src[1][i] + b1*temp2[0] + b2*src[0][i] + b3*src[0][i]; temp2[2] = B * src[2][i] + b1*temp2[1] + b2*temp2[0] + b3*src[0][i]; @@ -213,15 +229,9 @@ template void gaussVertical (T** src, T** dst, AlignedBuffer* b for (int j=0; j* buffer, int W, int row_from, int row_to, double sigma); -void gaussVertical_unsigned (unsigned short** src, unsigned short** dst, AlignedBuffer* buffer, int H, int col_from, int col_to, double sigma); -void gaussHorizontal_signed (short** src, short** dst, AlignedBuffer* buffer, int W, int row_from, int row_to, double sigma); -void gaussVertical_signed (short** src, short** dst, AlignedBuffer* buffer, int H, int col_from, int col_to, double sigma); -void gaussHorizontal_float (float** src, float** dst, AlignedBuffer* buffer, int W, int row_from, int row_to, double sigma); -void gaussVertical_float (float** src, float** dst, AlignedBuffer* buffer, int H, int col_from, int col_to, double sigma); -*/ #endif diff --git a/rtengine/imagefloat.cc b/rtengine/imagefloat.cc index 5cc66f9a9..120cceae0 100644 --- a/rtengine/imagefloat.cc +++ b/rtengine/imagefloat.cc @@ -23,6 +23,7 @@ #include "rtengine.h" #include "mytime.h" #include "iccstore.h" +#include "alignedbuffer.h" using namespace rtengine; @@ -271,22 +272,28 @@ void Imagefloat::calcCroppedHistogram(const ProcParams ¶ms, float scale, LUT // Parallized transformation; create transform with cmsFLAGS_NOCACHE! void Imagefloat::ExecCMSTransform(cmsHTRANSFORM hTransform) { + + AlignedBufferMP bufMP(width*3); + // LittleCMS cannot parallize planar setups // so build temporary buffers to allow multi processor execution #pragma omp parallel for for (int y=0; y* pBuf=bufMP.acquire(); + + float *p=pBuf->data, *pR=r[y], *pG=g[y], *pB=b[y]; for (int x=0; xdata, pBuf->data, width); - p=buffer; pR=r[y]; pG=g[y]; pB=b[y]; + p=pBuf->data; pR=r[y]; pG=g[y]; pB=b[y]; for (int x=0; x (lab->L, lpf, impish /*used as buffer here*/, width, height, thresh, false); + #ifdef _OPENMP + #pragma omp parallel + #endif + { + AlignedBufferMP buffer(max(width,height)); - AlignedBuffer* buffer = new AlignedBuffer (max(width,height)); - - gaussHorizontal (lab->L, lpf, buffer, width, height, max(2.0,thresh-1.0), false /*multiThread*/); - gaussVertical (lpf, lpf, buffer, width, height, max(2.0,thresh-1.0), false); - - delete buffer; - + gaussHorizontal (lab->L, lpf, buffer, width, height, max(2.0,thresh-1.0)); + gaussVertical (lpf, lpf, buffer, width, height, max(2.0,thresh-1.0)); + } //%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/rtengine/iplab2rgb.cc b/rtengine/iplab2rgb.cc index 8c89a65b5..fbdad662b 100644 --- a/rtengine/iplab2rgb.cc +++ b/rtengine/iplab2rgb.cc @@ -26,6 +26,7 @@ #include "../rtgui/options.h" #include "settings.h" #include "curves.h" +#include "alignedbuffer.h" #ifdef _OPENMP @@ -49,12 +50,14 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image) { //gamutmap(lab); if (monitorTransform) { + AlignedBufferMP bufferMP(3*lab->W); // cmsDoTransform is relatively expensive #pragma omp parallel for for (int i=0; iH; i++) { // pre-conversion to integer, since the output is 8 bit anyway, but LCMS is MUCH faster not converting from float - unsigned short buffer[3*lab->W]; + AlignedBuffer* pBuf=bufferMP.acquire(); + unsigned short * buffer=pBuf->data; const int ix = i * 3 * lab->W; int iy = 0; @@ -81,6 +84,8 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image) { } cmsDoTransform (monitorTransform, buffer, image->data + ix, lab->W); + + bufferMP.release(pBuf); } } else { diff --git a/rtengine/ipsharpen.cc b/rtengine/ipsharpen.cc index 0e00949c0..46600dbc2 100644 --- a/rtengine/ipsharpen.cc +++ b/rtengine/ipsharpen.cc @@ -80,15 +80,15 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) { #pragma omp parallel #endif { + AlignedBufferMP buffer(max(W,H)); - AlignedBuffer* buffer = new AlignedBuffer (max(W,H)); float damping = params->sharpening.deconvdamping / 5.0; bool needdamp = params->sharpening.deconvdamping > 0; for (int k=0; ksharpening.deconviter; k++) { // apply blur function (gaussian blur) - gaussHorizontal (tmpI, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread); - gaussVertical (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread); + gaussHorizontal (tmpI, tmp, buffer, W, H, params->sharpening.deconvradius / scale); + gaussVertical (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale); if (!needdamp) { #ifdef _OPENMP @@ -102,8 +102,8 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) { else dcdamping (tmp, lab->L, damping, W, H); - gaussHorizontal (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread); - gaussVertical (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread); + gaussHorizontal (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale); + gaussVertical (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale); #ifdef _OPENMP #pragma omp for @@ -112,7 +112,6 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) { for (int j=0; jsharpening.deconvamount / 100.0; float p1 = 1.0 - p2; @@ -155,18 +154,17 @@ void ImProcFunctions::sharpening (LabImage* lab, float** b2) { { - AlignedBuffer* buffer = new AlignedBuffer (max(W,H)); + AlignedBufferMP buffer(max(W,H)); if (params->sharpening.edgesonly==false) { - gaussHorizontal (lab->L, b2, buffer, W, H, params->sharpening.radius / scale, multiThread); - gaussVertical (b2, b2, buffer, W, H, params->sharpening.radius / scale, multiThread); + gaussHorizontal (lab->L, b2, buffer, W, H, params->sharpening.radius / scale); + gaussVertical (b2, b2, buffer, W, H, params->sharpening.radius / scale); } else { bilateral (lab->L, (float**)b3, b2, W, H, params->sharpening.edges_radius / scale, params->sharpening.edges_tolerance, multiThread); - gaussHorizontal (b3, b2, buffer, W, H, params->sharpening.radius / scale, multiThread); - gaussVertical (b2, b2, buffer, W, H, params->sharpening.radius / scale, multiThread); + gaussHorizontal (b3, b2, buffer, W, H, params->sharpening.radius / scale); + gaussVertical (b2, b2, buffer, W, H, params->sharpening.radius / scale); } - delete buffer; float** base = lab->L; if (params->sharpening.edgesonly) diff --git a/rtengine/shmap.cc b/rtengine/shmap.cc index 8d60d52e7..28bd1a1da 100644 --- a/rtengine/shmap.cc +++ b/rtengine/shmap.cc @@ -44,23 +44,22 @@ SHMap::~SHMap () { } void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int skip) { - - // fill with luminance - #pragma omp parallel for - for (int i=0; ir[i][j],0.f) + lumi[1]*std::max(img->g[i][j],0.f) + lumi[2]*std::max(img->b[i][j],0.f); - } #ifdef _OPENMP #pragma omp parallel #endif { - if (!hq) { - AlignedBuffer* buffer = new AlignedBuffer (max(W,H)); - gaussHorizontal (map, map, buffer, W, H, radius, multiThread); - gaussVertical (map, map, buffer, W, H, radius, multiThread); + // fill with luminance + #pragma omp for + for (int i=0; ir[i][j],0.f) + lumi[1]*std::max(img->g[i][j],0.f) + lumi[2]*std::max(img->b[i][j],0.f); + } - delete buffer; + if (!hq) { + AlignedBufferMP* pBuffer = new AlignedBufferMP (max(W,H)); + gaussHorizontal (map, map, *pBuffer, W, H, radius); + gaussVertical (map, map, *pBuffer, W, H, radius); + delete pBuffer; } else { /* diff --git a/rtengine/simpleprocess.cc b/rtengine/simpleprocess.cc index 270fa3b22..24572902c 100644 --- a/rtengine/simpleprocess.cc +++ b/rtengine/simpleprocess.cc @@ -54,7 +54,7 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p } procparams::ProcParams& params = job->pparams; - // aquire image from imagesource + // acquire image from imagesource ImageSource* imgsrc = ii->getImageSource (); int tr = TR_NONE;