Fixed multiprocessor sharpening bug

see issue 1454
This commit is contained in:
Oliver Duis
2012-07-01 22:20:09 +02:00
parent d47bab4380
commit 33f565a1ba
9 changed files with 132 additions and 68 deletions

View File

@@ -56,16 +56,15 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
#pragma omp parallel
#endif
{
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(src->W,src->H));
gaussHorizontal<float> (src->a, tmp1->a, buffer, src->W, src->H, radius, multiThread);
gaussHorizontal<float> (src->b, tmp1->b, buffer, src->W, src->H, radius, multiThread);
gaussVertical<float> (tmp1->a, tmp1->a, buffer, src->W, src->H, radius, multiThread);
gaussVertical<float> (tmp1->b, tmp1->b, buffer, src->W, src->H, radius, multiThread);
AlignedBufferMP<double> buffer(max(src->W,src->H));
gaussHorizontal<float> (src->L, tmp1->L, buffer, src->W, src->H, radius, multiThread);
gaussVertical<float> (tmp1->L, tmp1->L, buffer, src->W, src->H, radius, multiThread);
gaussHorizontal<float> (src->a, tmp1->a, buffer, src->W, src->H, radius);
gaussHorizontal<float> (src->b, tmp1->b, buffer, src->W, src->H, radius);
gaussVertical<float> (tmp1->a, tmp1->a, buffer, src->W, src->H, radius);
gaussVertical<float> (tmp1->b, tmp1->b, buffer, src->W, src->H, radius);
delete buffer;
gaussHorizontal<float> (src->L, tmp1->L, buffer, src->W, src->H, radius);
gaussVertical<float> (tmp1->L, tmp1->L, buffer, src->W, src->H, radius);
}
//#ifdef _OPENMP

View File

@@ -1,7 +1,7 @@
/*
* This file is part of RawTherapee.
*
* Copyright (c) 2004-2010 Gabor Horvath <hgabor@rawtherapee.com>
* Copyright (c) 2004-2012 Gabor Horvath <hgabor@rawtherapee.com>, Oliver Duis <oduis@oliverduis.de>
*
* RawTherapee is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,7 +19,10 @@
#ifndef _ALIGNEDBUFFER_
#define _ALIGNEDBUFFER_
#include <stdint.h>
#include <vector>
#include <glibmm.h>
// Aligned buffer that should be faster
template <class T> class AlignedBuffer {
private:
@@ -27,10 +30,12 @@ template <class T> class AlignedBuffer {
public:
T* data ;
bool inUse;
AlignedBuffer (size_t size, size_t align=16) {
real = new T[size+2*align];
data = (T*)((uintptr_t)real + (align-((uintptr_t)real)%align));
inUse=true;
}
~AlignedBuffer () {
@@ -38,4 +43,44 @@ template <class T> class AlignedBuffer {
}
};
// Multi processor version, use with OpenMP
template <class T> class AlignedBufferMP {
private:
Glib::Mutex mtx;
std::vector<AlignedBuffer<T>*> buffers;
size_t size;
public:
AlignedBufferMP(size_t sizeP) {
size=sizeP;
}
~AlignedBufferMP() {
for (int i=0;i<buffers.size();i++) delete buffers[i];
}
AlignedBuffer<T>* acquire() {
Glib::Mutex::Lock lock(mtx);
// Find available buffer
for (int i;i<buffers.size();i++) {
if (!buffers[i]->inUse) {
buffers[i]->inUse=true;
return buffers[i];
}
}
// Add new buffer if nothing is free
AlignedBuffer<T>* buffer=new AlignedBuffer<T>(size);
buffers.push_back(buffer);
return buffer;
}
void release(AlignedBuffer<T>* buffer) {
Glib::Mutex::Lock lock(mtx);
buffer->inUse=false;
}
};
#endif

View File

@@ -26,45 +26,55 @@
#ifdef _OPENMP
#include <omp.h>
#endif
#include <windows.h>
#include <stdio.h>
// classical filtering if the support window is small:
template<class T> void gaussHorizontal3 (T** src, T** dst, T* buffer, int W, int H, const float c0, const float c1, bool multiThread) {
template<class T> void gaussHorizontal3 (T** src, T** dst, AlignedBufferMP<double> &buffer, int W, int H, const float c0, const float c1) {
#ifdef _OPENMP
#pragma omp for
#endif
for (int i=0; i<H; i++) {
T* temp = buffer;
AlignedBuffer<double>* pBuf = buffer.acquire();
T* temp=(T*)pBuf->data;
for (int j=1; j<W-1; j++)
temp[j] = (T)(c1 * (src[i][j-1] + src[i][j+1]) + c0 * src[i][j]);
dst[i][0] = src[i][0];
memcpy (dst[i]+1, temp+1, (W-2)*sizeof(T));
buffer.release(pBuf);
dst[i][W-1] = src[i][W-1];
}
}
template<class T> void gaussVertical3 (T** src, T** dst, T* buffer, int W, int H, const float c0, const float c1, bool multiThread) {
template<class T> void gaussVertical3 (T** src, T** dst, AlignedBufferMP<double> &buffer, int W, int H, const float c0, const float c1) {
//#pragma omp parallel for if (multiThread)
#ifdef _OPENMP
#pragma omp for
#endif
for (int i=0; i<W; i++) {
T* temp = buffer;
AlignedBuffer<double>* pBuf = buffer.acquire();
T* temp = (T*)pBuf->data;
for (int j = 1; j<H-1; j++)
temp[j] = (T)(c1 * (src[j-1][i] + src[j+1][i]) + c0 * src[j][i]);
dst[0][i] = src[0][i];
for (int j=1; j<H-1; j++)
dst[j][i] = temp[j];
buffer.release(pBuf);
dst[H-1][i] = src[H-1][i];
}
}
// fast gaussian approximation if the support window is large
template<class T> void gaussHorizontal (T** src, T** dst, AlignedBuffer<double>* buffer, int W, int H, double sigma, bool multiThread) {
template<class T> void gaussHorizontal (T** src, T** dst, AlignedBufferMP<double> &buffer, int W, int H, double sigma) {
if (sigma<0.25) {
// dont perform filtering
@@ -81,7 +91,7 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBuffer<double>*
double csum = 2.0 * c1 + 1.0;
c1 /= csum;
double c0 = 1.0 / csum;
gaussHorizontal3<T> (src, dst, (T*)(buffer->data), W, H, c0, c1, multiThread);
gaussHorizontal3<T> (src, dst, buffer, W, H, c0, c1);
return;
}
@@ -113,10 +123,12 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBuffer<double>*
for (int i=0; i<3; i++)
for (int j=0; j<3; j++)
M[i][j] /= (1.0+b1-b2+b3)*(1.0+b2+(b1-b3)*b3);
// if (multiThread)
#pragma omp for
for (int i=0; i<H; i++) {
double* temp2 = buffer->data;
AlignedBuffer<double>* pBuf = buffer.acquire();
double* temp2 = pBuf->data;
temp2[0] = B * src[i][0] + b1*src[i][0] + b2*src[i][0] + b3*src[i][0];
temp2[1] = B * src[i][1] + b1*temp2[0] + b2*src[i][0] + b3*src[i][0];
temp2[2] = B * src[i][2] + b1*temp2[1] + b2*temp2[0] + b3*src[i][0];
@@ -136,10 +148,13 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBuffer<double>*
temp2[j] = B * temp2[j] + b1*temp2[j+1] + b2*temp2[j+2] + b3*temp2[j+3];
for (int j=0; j<W; j++)
dst[i][j] = (T)temp2[j];
}
buffer.release(pBuf);
}
template<class T> void gaussVertical (T** src, T** dst, AlignedBuffer<double>* buffer, int W, int H, double sigma, bool multiThread) {
}
template<class T> void gaussVertical (T** src, T** dst, AlignedBufferMP<double> &buffer, int W, int H, double sigma) {
if (sigma<0.25) {
// dont perform filtering
@@ -156,7 +171,7 @@ template<class T> void gaussVertical (T** src, T** dst, AlignedBuffer<double>* b
double csum = 2.0 * c1 + 1.0;
c1 /= csum;
double c0 = 1.0 / csum;
gaussVertical3<T> (src, dst, (T*)(buffer->data), W, H, c0, c1, multiThread);
gaussVertical3<T> (src, dst, buffer, W, H, c0, c1);
return;
}
@@ -192,7 +207,8 @@ template<class T> void gaussVertical (T** src, T** dst, AlignedBuffer<double>* b
#pragma omp for
#endif
for (int i=0; i<W; i++) {
double* temp2 = buffer->data;
AlignedBuffer<double>* pBuf = buffer.acquire();
double* temp2 = pBuf->data;
temp2[0] = B * src[0][i] + b1*src[0][i] + b2*src[0][i] + b3*src[0][i];
temp2[1] = B * src[1][i] + b1*temp2[0] + b2*src[0][i] + b3*src[0][i];
temp2[2] = B * src[2][i] + b1*temp2[1] + b2*temp2[0] + b3*src[0][i];
@@ -213,15 +229,9 @@ template<class T> void gaussVertical (T** src, T** dst, AlignedBuffer<double>* b
for (int j=0; j<H; j++)
dst[j][i] = (T)temp2[j];
buffer.release(pBuf);
}
}
/*
void gaussHorizontal_unsigned (unsigned short** src, unsigned short** dst, AlignedBuffer<double>* buffer, int W, int row_from, int row_to, double sigma);
void gaussVertical_unsigned (unsigned short** src, unsigned short** dst, AlignedBuffer<double>* buffer, int H, int col_from, int col_to, double sigma);
void gaussHorizontal_signed (short** src, short** dst, AlignedBuffer<double>* buffer, int W, int row_from, int row_to, double sigma);
void gaussVertical_signed (short** src, short** dst, AlignedBuffer<double>* buffer, int H, int col_from, int col_to, double sigma);
void gaussHorizontal_float (float** src, float** dst, AlignedBuffer<double>* buffer, int W, int row_from, int row_to, double sigma);
void gaussVertical_float (float** src, float** dst, AlignedBuffer<double>* buffer, int H, int col_from, int col_to, double sigma);
*/
#endif

View File

@@ -23,6 +23,7 @@
#include "rtengine.h"
#include "mytime.h"
#include "iccstore.h"
#include "alignedbuffer.h"
using namespace rtengine;
@@ -271,22 +272,28 @@ void Imagefloat::calcCroppedHistogram(const ProcParams &params, float scale, LUT
// Parallized transformation; create transform with cmsFLAGS_NOCACHE!
void Imagefloat::ExecCMSTransform(cmsHTRANSFORM hTransform) {
AlignedBufferMP<float> bufMP(width*3);
// LittleCMS cannot parallize planar setups
// so build temporary buffers to allow multi processor execution
#pragma omp parallel for
for (int y=0; y<height; y++) {
float buffer[width*3];
float *p=buffer, *pR=r[y], *pG=g[y], *pB=b[y];
AlignedBuffer<float>* pBuf=bufMP.acquire();
float *p=pBuf->data, *pR=r[y], *pG=g[y], *pB=b[y];
for (int x=0; x<width; x++) {
*(p++) = *(pR++); *(p++) = *(pG++); *(p++) = *(pB++);
}
cmsDoTransform (hTransform, buffer, buffer, width);
cmsDoTransform (hTransform, pBuf->data, pBuf->data, width);
p=buffer; pR=r[y]; pG=g[y]; pB=b[y];
p=pBuf->data; pR=r[y]; pG=g[y]; pB=b[y];
for (int x=0; x<width; x++) {
*(pR++) = *(p++); *(pG++) = *(p++); *(pB++) = *(p++);
}
bufMP.release(pBuf);
}
}

View File

@@ -62,14 +62,15 @@ void ImProcFunctions::impulse_nr (LabImage* lab, double thresh) {
int i1, j1;
//rangeblur<unsigned short, unsigned int> (lab->L, lpf, impish /*used as buffer here*/, width, height, thresh, false);
#ifdef _OPENMP
#pragma omp parallel
#endif
{
AlignedBufferMP<double> buffer(max(width,height));
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(width,height));
gaussHorizontal<float> (lab->L, lpf, buffer, width, height, max(2.0,thresh-1.0), false /*multiThread*/);
gaussVertical<float> (lpf, lpf, buffer, width, height, max(2.0,thresh-1.0), false);
delete buffer;
gaussHorizontal<float> (lab->L, lpf, buffer, width, height, max(2.0,thresh-1.0));
gaussVertical<float> (lpf, lpf, buffer, width, height, max(2.0,thresh-1.0));
}
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

View File

@@ -26,6 +26,7 @@
#include "../rtgui/options.h"
#include "settings.h"
#include "curves.h"
#include "alignedbuffer.h"
#ifdef _OPENMP
@@ -49,12 +50,14 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image) {
//gamutmap(lab);
if (monitorTransform) {
AlignedBufferMP<unsigned short> bufferMP(3*lab->W);
// cmsDoTransform is relatively expensive
#pragma omp parallel for
for (int i=0; i<lab->H; i++) {
// pre-conversion to integer, since the output is 8 bit anyway, but LCMS is MUCH faster not converting from float
unsigned short buffer[3*lab->W];
AlignedBuffer<unsigned short>* pBuf=bufferMP.acquire();
unsigned short * buffer=pBuf->data;
const int ix = i * 3 * lab->W;
int iy = 0;
@@ -81,6 +84,8 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image) {
}
cmsDoTransform (monitorTransform, buffer, image->data + ix, lab->W);
bufferMP.release(pBuf);
}
} else {

View File

@@ -80,15 +80,15 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) {
#pragma omp parallel
#endif
{
AlignedBufferMP<double> buffer(max(W,H));
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(W,H));
float damping = params->sharpening.deconvdamping / 5.0;
bool needdamp = params->sharpening.deconvdamping > 0;
for (int k=0; k<params->sharpening.deconviter; k++) {
// apply blur function (gaussian blur)
gaussHorizontal<float> (tmpI, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread);
gaussVertical<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread);
gaussHorizontal<float> (tmpI, tmp, buffer, W, H, params->sharpening.deconvradius / scale);
gaussVertical<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale);
if (!needdamp) {
#ifdef _OPENMP
@@ -102,8 +102,8 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) {
else
dcdamping (tmp, lab->L, damping, W, H);
gaussHorizontal<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread);
gaussVertical<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread);
gaussHorizontal<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale);
gaussVertical<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale);
#ifdef _OPENMP
#pragma omp for
@@ -112,7 +112,6 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) {
for (int j=0; j<W; j++)
tmpI[i][j] = tmpI[i][j] * tmp[i][j];
} // end for
delete buffer;
float p2 = params->sharpening.deconvamount / 100.0;
float p1 = 1.0 - p2;
@@ -155,18 +154,17 @@ void ImProcFunctions::sharpening (LabImage* lab, float** b2) {
{
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(W,H));
AlignedBufferMP<double> buffer(max(W,H));
if (params->sharpening.edgesonly==false) {
gaussHorizontal<float> (lab->L, b2, buffer, W, H, params->sharpening.radius / scale, multiThread);
gaussVertical<float> (b2, b2, buffer, W, H, params->sharpening.radius / scale, multiThread);
gaussHorizontal<float> (lab->L, b2, buffer, W, H, params->sharpening.radius / scale);
gaussVertical<float> (b2, b2, buffer, W, H, params->sharpening.radius / scale);
}
else {
bilateral<float, float> (lab->L, (float**)b3, b2, W, H, params->sharpening.edges_radius / scale, params->sharpening.edges_tolerance, multiThread);
gaussHorizontal<float> (b3, b2, buffer, W, H, params->sharpening.radius / scale, multiThread);
gaussVertical<float> (b2, b2, buffer, W, H, params->sharpening.radius / scale, multiThread);
gaussHorizontal<float> (b3, b2, buffer, W, H, params->sharpening.radius / scale);
gaussVertical<float> (b2, b2, buffer, W, H, params->sharpening.radius / scale);
}
delete buffer;
float** base = lab->L;
if (params->sharpening.edgesonly)

View File

@@ -44,23 +44,22 @@ SHMap::~SHMap () {
}
void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int skip) {
// fill with luminance
#pragma omp parallel for
for (int i=0; i<H; i++)
for (int j=0; j<W; j++) {
map[i][j] = lumi[0]*std::max(img->r[i][j],0.f) + lumi[1]*std::max(img->g[i][j],0.f) + lumi[2]*std::max(img->b[i][j],0.f);
}
#ifdef _OPENMP
#pragma omp parallel
#endif
{
if (!hq) {
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(W,H));
gaussHorizontal<float> (map, map, buffer, W, H, radius, multiThread);
gaussVertical<float> (map, map, buffer, W, H, radius, multiThread);
// fill with luminance
#pragma omp for
for (int i=0; i<H; i++)
for (int j=0; j<W; j++) {
map[i][j] = lumi[0]*std::max(img->r[i][j],0.f) + lumi[1]*std::max(img->g[i][j],0.f) + lumi[2]*std::max(img->b[i][j],0.f);
}
delete buffer;
if (!hq) {
AlignedBufferMP<double>* pBuffer = new AlignedBufferMP<double> (max(W,H));
gaussHorizontal<float> (map, map, *pBuffer, W, H, radius);
gaussVertical<float> (map, map, *pBuffer, W, H, radius);
delete pBuffer;
}
else {
/*

View File

@@ -54,7 +54,7 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p
}
procparams::ProcParams& params = job->pparams;
// aquire image from imagesource
// acquire image from imagesource
ImageSource* imgsrc = ii->getImageSource ();
int tr = TR_NONE;