Fixed multiprocessor sharpening bug
see issue 1454
This commit is contained in:
@@ -56,16 +56,15 @@ void ImProcFunctions::PF_correct_RT(LabImage * src, LabImage * dst, double radiu
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(src->W,src->H));
|
||||
gaussHorizontal<float> (src->a, tmp1->a, buffer, src->W, src->H, radius, multiThread);
|
||||
gaussHorizontal<float> (src->b, tmp1->b, buffer, src->W, src->H, radius, multiThread);
|
||||
gaussVertical<float> (tmp1->a, tmp1->a, buffer, src->W, src->H, radius, multiThread);
|
||||
gaussVertical<float> (tmp1->b, tmp1->b, buffer, src->W, src->H, radius, multiThread);
|
||||
AlignedBufferMP<double> buffer(max(src->W,src->H));
|
||||
|
||||
gaussHorizontal<float> (src->L, tmp1->L, buffer, src->W, src->H, radius, multiThread);
|
||||
gaussVertical<float> (tmp1->L, tmp1->L, buffer, src->W, src->H, radius, multiThread);
|
||||
gaussHorizontal<float> (src->a, tmp1->a, buffer, src->W, src->H, radius);
|
||||
gaussHorizontal<float> (src->b, tmp1->b, buffer, src->W, src->H, radius);
|
||||
gaussVertical<float> (tmp1->a, tmp1->a, buffer, src->W, src->H, radius);
|
||||
gaussVertical<float> (tmp1->b, tmp1->b, buffer, src->W, src->H, radius);
|
||||
|
||||
delete buffer;
|
||||
gaussHorizontal<float> (src->L, tmp1->L, buffer, src->W, src->H, radius);
|
||||
gaussVertical<float> (tmp1->L, tmp1->L, buffer, src->W, src->H, radius);
|
||||
}
|
||||
|
||||
//#ifdef _OPENMP
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* This file is part of RawTherapee.
|
||||
*
|
||||
* Copyright (c) 2004-2010 Gabor Horvath <hgabor@rawtherapee.com>
|
||||
* Copyright (c) 2004-2012 Gabor Horvath <hgabor@rawtherapee.com>, Oliver Duis <oduis@oliverduis.de>
|
||||
*
|
||||
* RawTherapee is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
@@ -19,7 +19,10 @@
|
||||
#ifndef _ALIGNEDBUFFER_
|
||||
#define _ALIGNEDBUFFER_
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
#include <glibmm.h>
|
||||
|
||||
// Aligned buffer that should be faster
|
||||
template <class T> class AlignedBuffer {
|
||||
|
||||
private:
|
||||
@@ -27,10 +30,12 @@ template <class T> class AlignedBuffer {
|
||||
|
||||
public:
|
||||
T* data ;
|
||||
bool inUse;
|
||||
|
||||
AlignedBuffer (size_t size, size_t align=16) {
|
||||
real = new T[size+2*align];
|
||||
data = (T*)((uintptr_t)real + (align-((uintptr_t)real)%align));
|
||||
inUse=true;
|
||||
}
|
||||
|
||||
~AlignedBuffer () {
|
||||
@@ -38,4 +43,44 @@ template <class T> class AlignedBuffer {
|
||||
}
|
||||
};
|
||||
|
||||
// Multi processor version, use with OpenMP
|
||||
template <class T> class AlignedBufferMP {
|
||||
private:
|
||||
Glib::Mutex mtx;
|
||||
std::vector<AlignedBuffer<T>*> buffers;
|
||||
size_t size;
|
||||
|
||||
public:
|
||||
AlignedBufferMP(size_t sizeP) {
|
||||
size=sizeP;
|
||||
}
|
||||
|
||||
~AlignedBufferMP() {
|
||||
for (int i=0;i<buffers.size();i++) delete buffers[i];
|
||||
}
|
||||
|
||||
AlignedBuffer<T>* acquire() {
|
||||
Glib::Mutex::Lock lock(mtx);
|
||||
|
||||
// Find available buffer
|
||||
for (int i;i<buffers.size();i++) {
|
||||
if (!buffers[i]->inUse) {
|
||||
buffers[i]->inUse=true;
|
||||
return buffers[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Add new buffer if nothing is free
|
||||
AlignedBuffer<T>* buffer=new AlignedBuffer<T>(size);
|
||||
buffers.push_back(buffer);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void release(AlignedBuffer<T>* buffer) {
|
||||
Glib::Mutex::Lock lock(mtx);
|
||||
|
||||
buffer->inUse=false;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
@@ -26,45 +26,55 @@
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// classical filtering if the support window is small:
|
||||
|
||||
template<class T> void gaussHorizontal3 (T** src, T** dst, T* buffer, int W, int H, const float c0, const float c1, bool multiThread) {
|
||||
|
||||
template<class T> void gaussHorizontal3 (T** src, T** dst, AlignedBufferMP<double> &buffer, int W, int H, const float c0, const float c1) {
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i=0; i<H; i++) {
|
||||
T* temp = buffer;
|
||||
AlignedBuffer<double>* pBuf = buffer.acquire();
|
||||
T* temp=(T*)pBuf->data;
|
||||
|
||||
for (int j=1; j<W-1; j++)
|
||||
temp[j] = (T)(c1 * (src[i][j-1] + src[i][j+1]) + c0 * src[i][j]);
|
||||
dst[i][0] = src[i][0];
|
||||
memcpy (dst[i]+1, temp+1, (W-2)*sizeof(T));
|
||||
|
||||
buffer.release(pBuf);
|
||||
|
||||
dst[i][W-1] = src[i][W-1];
|
||||
}
|
||||
}
|
||||
|
||||
template<class T> void gaussVertical3 (T** src, T** dst, T* buffer, int W, int H, const float c0, const float c1, bool multiThread) {
|
||||
template<class T> void gaussVertical3 (T** src, T** dst, AlignedBufferMP<double> &buffer, int W, int H, const float c0, const float c1) {
|
||||
|
||||
//#pragma omp parallel for if (multiThread)
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i=0; i<W; i++) {
|
||||
T* temp = buffer;
|
||||
AlignedBuffer<double>* pBuf = buffer.acquire();
|
||||
T* temp = (T*)pBuf->data;
|
||||
|
||||
for (int j = 1; j<H-1; j++)
|
||||
temp[j] = (T)(c1 * (src[j-1][i] + src[j+1][i]) + c0 * src[j][i]);
|
||||
dst[0][i] = src[0][i];
|
||||
for (int j=1; j<H-1; j++)
|
||||
dst[j][i] = temp[j];
|
||||
|
||||
buffer.release(pBuf);
|
||||
|
||||
dst[H-1][i] = src[H-1][i];
|
||||
}
|
||||
}
|
||||
|
||||
// fast gaussian approximation if the support window is large
|
||||
|
||||
template<class T> void gaussHorizontal (T** src, T** dst, AlignedBuffer<double>* buffer, int W, int H, double sigma, bool multiThread) {
|
||||
template<class T> void gaussHorizontal (T** src, T** dst, AlignedBufferMP<double> &buffer, int W, int H, double sigma) {
|
||||
|
||||
if (sigma<0.25) {
|
||||
// dont perform filtering
|
||||
@@ -81,7 +91,7 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBuffer<double>*
|
||||
double csum = 2.0 * c1 + 1.0;
|
||||
c1 /= csum;
|
||||
double c0 = 1.0 / csum;
|
||||
gaussHorizontal3<T> (src, dst, (T*)(buffer->data), W, H, c0, c1, multiThread);
|
||||
gaussHorizontal3<T> (src, dst, buffer, W, H, c0, c1);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -113,10 +123,12 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBuffer<double>*
|
||||
for (int i=0; i<3; i++)
|
||||
for (int j=0; j<3; j++)
|
||||
M[i][j] /= (1.0+b1-b2+b3)*(1.0+b2+(b1-b3)*b3);
|
||||
// if (multiThread)
|
||||
|
||||
#pragma omp for
|
||||
for (int i=0; i<H; i++) {
|
||||
double* temp2 = buffer->data;
|
||||
AlignedBuffer<double>* pBuf = buffer.acquire();
|
||||
double* temp2 = pBuf->data;
|
||||
|
||||
temp2[0] = B * src[i][0] + b1*src[i][0] + b2*src[i][0] + b3*src[i][0];
|
||||
temp2[1] = B * src[i][1] + b1*temp2[0] + b2*src[i][0] + b3*src[i][0];
|
||||
temp2[2] = B * src[i][2] + b1*temp2[1] + b2*temp2[0] + b3*src[i][0];
|
||||
@@ -136,10 +148,13 @@ template<class T> void gaussHorizontal (T** src, T** dst, AlignedBuffer<double>*
|
||||
temp2[j] = B * temp2[j] + b1*temp2[j+1] + b2*temp2[j+2] + b3*temp2[j+3];
|
||||
for (int j=0; j<W; j++)
|
||||
dst[i][j] = (T)temp2[j];
|
||||
}
|
||||
|
||||
buffer.release(pBuf);
|
||||
}
|
||||
|
||||
template<class T> void gaussVertical (T** src, T** dst, AlignedBuffer<double>* buffer, int W, int H, double sigma, bool multiThread) {
|
||||
}
|
||||
|
||||
template<class T> void gaussVertical (T** src, T** dst, AlignedBufferMP<double> &buffer, int W, int H, double sigma) {
|
||||
|
||||
if (sigma<0.25) {
|
||||
// dont perform filtering
|
||||
@@ -156,7 +171,7 @@ template<class T> void gaussVertical (T** src, T** dst, AlignedBuffer<double>* b
|
||||
double csum = 2.0 * c1 + 1.0;
|
||||
c1 /= csum;
|
||||
double c0 = 1.0 / csum;
|
||||
gaussVertical3<T> (src, dst, (T*)(buffer->data), W, H, c0, c1, multiThread);
|
||||
gaussVertical3<T> (src, dst, buffer, W, H, c0, c1);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -192,7 +207,8 @@ template<class T> void gaussVertical (T** src, T** dst, AlignedBuffer<double>* b
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i=0; i<W; i++) {
|
||||
double* temp2 = buffer->data;
|
||||
AlignedBuffer<double>* pBuf = buffer.acquire();
|
||||
double* temp2 = pBuf->data;
|
||||
temp2[0] = B * src[0][i] + b1*src[0][i] + b2*src[0][i] + b3*src[0][i];
|
||||
temp2[1] = B * src[1][i] + b1*temp2[0] + b2*src[0][i] + b3*src[0][i];
|
||||
temp2[2] = B * src[2][i] + b1*temp2[1] + b2*temp2[0] + b3*src[0][i];
|
||||
@@ -213,15 +229,9 @@ template<class T> void gaussVertical (T** src, T** dst, AlignedBuffer<double>* b
|
||||
|
||||
for (int j=0; j<H; j++)
|
||||
dst[j][i] = (T)temp2[j];
|
||||
|
||||
buffer.release(pBuf);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
void gaussHorizontal_unsigned (unsigned short** src, unsigned short** dst, AlignedBuffer<double>* buffer, int W, int row_from, int row_to, double sigma);
|
||||
void gaussVertical_unsigned (unsigned short** src, unsigned short** dst, AlignedBuffer<double>* buffer, int H, int col_from, int col_to, double sigma);
|
||||
void gaussHorizontal_signed (short** src, short** dst, AlignedBuffer<double>* buffer, int W, int row_from, int row_to, double sigma);
|
||||
void gaussVertical_signed (short** src, short** dst, AlignedBuffer<double>* buffer, int H, int col_from, int col_to, double sigma);
|
||||
void gaussHorizontal_float (float** src, float** dst, AlignedBuffer<double>* buffer, int W, int row_from, int row_to, double sigma);
|
||||
void gaussVertical_float (float** src, float** dst, AlignedBuffer<double>* buffer, int H, int col_from, int col_to, double sigma);
|
||||
*/
|
||||
#endif
|
||||
|
@@ -23,6 +23,7 @@
|
||||
#include "rtengine.h"
|
||||
#include "mytime.h"
|
||||
#include "iccstore.h"
|
||||
#include "alignedbuffer.h"
|
||||
|
||||
using namespace rtengine;
|
||||
|
||||
@@ -271,22 +272,28 @@ void Imagefloat::calcCroppedHistogram(const ProcParams ¶ms, float scale, LUT
|
||||
|
||||
// Parallized transformation; create transform with cmsFLAGS_NOCACHE!
|
||||
void Imagefloat::ExecCMSTransform(cmsHTRANSFORM hTransform) {
|
||||
|
||||
AlignedBufferMP<float> bufMP(width*3);
|
||||
|
||||
// LittleCMS cannot parallize planar setups
|
||||
// so build temporary buffers to allow multi processor execution
|
||||
#pragma omp parallel for
|
||||
for (int y=0; y<height; y++) {
|
||||
float buffer[width*3];
|
||||
float *p=buffer, *pR=r[y], *pG=g[y], *pB=b[y];
|
||||
AlignedBuffer<float>* pBuf=bufMP.acquire();
|
||||
|
||||
float *p=pBuf->data, *pR=r[y], *pG=g[y], *pB=b[y];
|
||||
|
||||
for (int x=0; x<width; x++) {
|
||||
*(p++) = *(pR++); *(p++) = *(pG++); *(p++) = *(pB++);
|
||||
}
|
||||
|
||||
cmsDoTransform (hTransform, buffer, buffer, width);
|
||||
cmsDoTransform (hTransform, pBuf->data, pBuf->data, width);
|
||||
|
||||
p=buffer; pR=r[y]; pG=g[y]; pB=b[y];
|
||||
p=pBuf->data; pR=r[y]; pG=g[y]; pB=b[y];
|
||||
for (int x=0; x<width; x++) {
|
||||
*(pR++) = *(p++); *(pG++) = *(p++); *(pB++) = *(p++);
|
||||
}
|
||||
|
||||
bufMP.release(pBuf);
|
||||
}
|
||||
}
|
||||
|
@@ -62,14 +62,15 @@ void ImProcFunctions::impulse_nr (LabImage* lab, double thresh) {
|
||||
int i1, j1;
|
||||
|
||||
//rangeblur<unsigned short, unsigned int> (lab->L, lpf, impish /*used as buffer here*/, width, height, thresh, false);
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
AlignedBufferMP<double> buffer(max(width,height));
|
||||
|
||||
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(width,height));
|
||||
|
||||
gaussHorizontal<float> (lab->L, lpf, buffer, width, height, max(2.0,thresh-1.0), false /*multiThread*/);
|
||||
gaussVertical<float> (lpf, lpf, buffer, width, height, max(2.0,thresh-1.0), false);
|
||||
|
||||
delete buffer;
|
||||
|
||||
gaussHorizontal<float> (lab->L, lpf, buffer, width, height, max(2.0,thresh-1.0));
|
||||
gaussVertical<float> (lpf, lpf, buffer, width, height, max(2.0,thresh-1.0));
|
||||
}
|
||||
|
||||
//%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include "../rtgui/options.h"
|
||||
#include "settings.h"
|
||||
#include "curves.h"
|
||||
#include "alignedbuffer.h"
|
||||
|
||||
|
||||
#ifdef _OPENMP
|
||||
@@ -49,12 +50,14 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image) {
|
||||
//gamutmap(lab);
|
||||
|
||||
if (monitorTransform) {
|
||||
AlignedBufferMP<unsigned short> bufferMP(3*lab->W);
|
||||
|
||||
// cmsDoTransform is relatively expensive
|
||||
#pragma omp parallel for
|
||||
for (int i=0; i<lab->H; i++) {
|
||||
// pre-conversion to integer, since the output is 8 bit anyway, but LCMS is MUCH faster not converting from float
|
||||
unsigned short buffer[3*lab->W];
|
||||
AlignedBuffer<unsigned short>* pBuf=bufferMP.acquire();
|
||||
unsigned short * buffer=pBuf->data;
|
||||
|
||||
const int ix = i * 3 * lab->W;
|
||||
int iy = 0;
|
||||
@@ -81,6 +84,8 @@ void ImProcFunctions::lab2monitorRgb (LabImage* lab, Image8* image) {
|
||||
}
|
||||
|
||||
cmsDoTransform (monitorTransform, buffer, image->data + ix, lab->W);
|
||||
|
||||
bufferMP.release(pBuf);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@@ -80,15 +80,15 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) {
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
AlignedBufferMP<double> buffer(max(W,H));
|
||||
|
||||
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(W,H));
|
||||
float damping = params->sharpening.deconvdamping / 5.0;
|
||||
bool needdamp = params->sharpening.deconvdamping > 0;
|
||||
for (int k=0; k<params->sharpening.deconviter; k++) {
|
||||
|
||||
// apply blur function (gaussian blur)
|
||||
gaussHorizontal<float> (tmpI, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread);
|
||||
gaussVertical<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread);
|
||||
gaussHorizontal<float> (tmpI, tmp, buffer, W, H, params->sharpening.deconvradius / scale);
|
||||
gaussVertical<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale);
|
||||
|
||||
if (!needdamp) {
|
||||
#ifdef _OPENMP
|
||||
@@ -102,8 +102,8 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) {
|
||||
else
|
||||
dcdamping (tmp, lab->L, damping, W, H);
|
||||
|
||||
gaussHorizontal<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread);
|
||||
gaussVertical<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale, multiThread);
|
||||
gaussHorizontal<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale);
|
||||
gaussVertical<float> (tmp, tmp, buffer, W, H, params->sharpening.deconvradius / scale);
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
@@ -112,7 +112,6 @@ void ImProcFunctions::deconvsharpening (LabImage* lab, float** b2) {
|
||||
for (int j=0; j<W; j++)
|
||||
tmpI[i][j] = tmpI[i][j] * tmp[i][j];
|
||||
} // end for
|
||||
delete buffer;
|
||||
|
||||
float p2 = params->sharpening.deconvamount / 100.0;
|
||||
float p1 = 1.0 - p2;
|
||||
@@ -155,18 +154,17 @@ void ImProcFunctions::sharpening (LabImage* lab, float** b2) {
|
||||
{
|
||||
|
||||
|
||||
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(W,H));
|
||||
AlignedBufferMP<double> buffer(max(W,H));
|
||||
if (params->sharpening.edgesonly==false) {
|
||||
|
||||
gaussHorizontal<float> (lab->L, b2, buffer, W, H, params->sharpening.radius / scale, multiThread);
|
||||
gaussVertical<float> (b2, b2, buffer, W, H, params->sharpening.radius / scale, multiThread);
|
||||
gaussHorizontal<float> (lab->L, b2, buffer, W, H, params->sharpening.radius / scale);
|
||||
gaussVertical<float> (b2, b2, buffer, W, H, params->sharpening.radius / scale);
|
||||
}
|
||||
else {
|
||||
bilateral<float, float> (lab->L, (float**)b3, b2, W, H, params->sharpening.edges_radius / scale, params->sharpening.edges_tolerance, multiThread);
|
||||
gaussHorizontal<float> (b3, b2, buffer, W, H, params->sharpening.radius / scale, multiThread);
|
||||
gaussVertical<float> (b2, b2, buffer, W, H, params->sharpening.radius / scale, multiThread);
|
||||
gaussHorizontal<float> (b3, b2, buffer, W, H, params->sharpening.radius / scale);
|
||||
gaussVertical<float> (b2, b2, buffer, W, H, params->sharpening.radius / scale);
|
||||
}
|
||||
delete buffer;
|
||||
|
||||
float** base = lab->L;
|
||||
if (params->sharpening.edgesonly)
|
||||
|
@@ -44,23 +44,22 @@ SHMap::~SHMap () {
|
||||
}
|
||||
|
||||
void SHMap::update (Imagefloat* img, double radius, double lumi[3], bool hq, int skip) {
|
||||
|
||||
// fill with luminance
|
||||
#pragma omp parallel for
|
||||
for (int i=0; i<H; i++)
|
||||
for (int j=0; j<W; j++) {
|
||||
map[i][j] = lumi[0]*std::max(img->r[i][j],0.f) + lumi[1]*std::max(img->g[i][j],0.f) + lumi[2]*std::max(img->b[i][j],0.f);
|
||||
}
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
if (!hq) {
|
||||
AlignedBuffer<double>* buffer = new AlignedBuffer<double> (max(W,H));
|
||||
gaussHorizontal<float> (map, map, buffer, W, H, radius, multiThread);
|
||||
gaussVertical<float> (map, map, buffer, W, H, radius, multiThread);
|
||||
// fill with luminance
|
||||
#pragma omp for
|
||||
for (int i=0; i<H; i++)
|
||||
for (int j=0; j<W; j++) {
|
||||
map[i][j] = lumi[0]*std::max(img->r[i][j],0.f) + lumi[1]*std::max(img->g[i][j],0.f) + lumi[2]*std::max(img->b[i][j],0.f);
|
||||
}
|
||||
|
||||
delete buffer;
|
||||
if (!hq) {
|
||||
AlignedBufferMP<double>* pBuffer = new AlignedBufferMP<double> (max(W,H));
|
||||
gaussHorizontal<float> (map, map, *pBuffer, W, H, radius);
|
||||
gaussVertical<float> (map, map, *pBuffer, W, H, radius);
|
||||
delete pBuffer;
|
||||
}
|
||||
else {
|
||||
/*
|
||||
|
@@ -54,7 +54,7 @@ IImage16* processImage (ProcessingJob* pjob, int& errorCode, ProgressListener* p
|
||||
}
|
||||
procparams::ProcParams& params = job->pparams;
|
||||
|
||||
// aquire image from imagesource
|
||||
// acquire image from imagesource
|
||||
ImageSource* imgsrc = ii->getImageSource ();
|
||||
|
||||
int tr = TR_NONE;
|
||||
|
Reference in New Issue
Block a user