locallab: speedup and a bit of cleanup for local sharpening
This commit is contained in:
@@ -31,6 +31,7 @@
|
||||
#include "iccmatrices.h"
|
||||
#include "color.h"
|
||||
#include "rt_math.h"
|
||||
#include "jaggedarray.h"
|
||||
#ifdef _DEBUG
|
||||
#include "mytime.h"
|
||||
#endif
|
||||
@@ -1886,7 +1887,6 @@ void ImProcFunctions::Contrast_Local (int call, float ave, LabImage * bufcontori
|
||||
// contrast - perhaps for 4 areas if need
|
||||
// I tried shmap adaptaed to Lab, but no real gain and artifacts
|
||||
const float localtype = lumaref; // always spot area
|
||||
// const float localtype = ave; // always spot area
|
||||
const float ach = (float)lp.trans / 100.f;
|
||||
float reducac;
|
||||
|
||||
@@ -1933,7 +1933,6 @@ void ImProcFunctions::Contrast_Local (int call, float ave, LabImage * bufcontori
|
||||
float minco = +10000.f;
|
||||
|
||||
if (call <= 3) {
|
||||
std::cout << lp.sens << " " << lp.qualmet << std::endl;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel if (multiThread)
|
||||
#endif
|
||||
@@ -1949,11 +1948,8 @@ std::cout << lp.sens << " " << lp.qualmet << std::endl;
|
||||
#pragma omp for schedule(dynamic,16)
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < transformed->H; y++)
|
||||
{
|
||||
|
||||
for (int y = 0; y < transformed->H; y++) {
|
||||
const int loy = cy + y;
|
||||
|
||||
const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing
|
||||
|
||||
if(isZone0) { // outside selection and outside transition zone => no effect, keep original values
|
||||
@@ -1978,14 +1974,12 @@ std::cout << lp.sens << " " << lp.qualmet << std::endl;
|
||||
#endif
|
||||
|
||||
for (int x = 0; x < transformed->W; x++) {
|
||||
int lox = cx + x;
|
||||
const int lox = cx + x;
|
||||
|
||||
float rL;
|
||||
|
||||
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && (rL = original->L[y][x]) > 3.2768f) {
|
||||
// rL > 3.2768f to avoid crash with very low gamut in rare cases ex : L=0.01 a=0.5 b=-0.9
|
||||
int begx = lp.xc - lp.lxL;
|
||||
int begy = lp.yc - lp.lyT;
|
||||
int zone = 0;
|
||||
|
||||
float localFactor = 1.f;
|
||||
@@ -2008,6 +2002,8 @@ std::cout << lp.sens << " " << lp.qualmet << std::endl;
|
||||
|
||||
float cli = 1.f;
|
||||
|
||||
const int begx = lp.xc - lp.lxL;
|
||||
const int begy = lp.yc - lp.lyT;
|
||||
if (lp.curvact) {
|
||||
|
||||
cli = (buflightc[loy - begy][lox - begx]);
|
||||
@@ -2119,7 +2115,6 @@ std::cout << lp.sens << " " << lp.qualmet << std::endl;
|
||||
|
||||
if (rchro < kcr) {
|
||||
fach *= SQR(rchro) / SQR(kcr);
|
||||
// fach *= (1.f / (kcr * kcr)) * rchro * rchro;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2212,7 +2207,6 @@ std::cout << lp.sens << " " << lp.qualmet << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2543,7 +2537,7 @@ void ImProcFunctions::InverseSharp_Local (int sp, float **loctemp, const float h
|
||||
|
||||
void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const float hueplus, const float huemoins, const float hueref, const float dhue, const float chromaref, const float lumaref, const local_params & lp, LabImage * original, LabImage * transformed, int cx, int cy)
|
||||
{
|
||||
// BENCHFUN
|
||||
BENCHFUN
|
||||
const float localtype = lumaref; // always spot area
|
||||
const float ach = (float)lp.trans / 100.f;
|
||||
float reducac;
|
||||
@@ -2573,6 +2567,7 @@ void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const floa
|
||||
const float ahu = 1.f / (2.8f * lp.senssha - 280.f);
|
||||
const float bhu = 1.f - ahu * 2.8f * lp.senssha;
|
||||
|
||||
const bool detectHue = lp.senssha < 20.f && lp.qualmet == 1;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel if (multiThread)
|
||||
#endif
|
||||
@@ -2588,50 +2583,63 @@ void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const floa
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < transformed->H; y++) {
|
||||
|
||||
const int loy = cy + y;
|
||||
const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing
|
||||
|
||||
if(isZone0) { // outside selection and outside transition zone => no effect, keep original values
|
||||
for (int x = 0; x < transformed->W; x++) {
|
||||
transformed->L[y][x] = original->L[y][x];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
int i = 0;
|
||||
|
||||
for (; i < transformed->W - 3; i += 4) {
|
||||
vfloat av = LVFU (original->a[y][i]);
|
||||
vfloat bv = LVFU (original->b[y][i]);
|
||||
STVF (atan2Buffer[i], xatan2f (bv, av));
|
||||
STVF (sqrtBuffer[i], _mm_sqrt_ps (SQRV (bv) + SQRV (av)) / c327d68v);
|
||||
}
|
||||
if(detectHue) {
|
||||
for (; i < transformed->W - 3; i += 4) {
|
||||
vfloat av = LVFU (original->a[y][i]);
|
||||
vfloat bv = LVFU (original->b[y][i]);
|
||||
STVF (atan2Buffer[i], xatan2f (bv, av));
|
||||
STVF (sqrtBuffer[i], _mm_sqrt_ps (SQRV (bv) + SQRV (av)) / c327d68v);
|
||||
}
|
||||
|
||||
for (; i < transformed->W; i++) {
|
||||
atan2Buffer[i] = xatan2f (original->b[y][i], original->a[y][i]);
|
||||
sqrtBuffer[i] = sqrt (SQR (original->b[y][i]) + SQR (original->a[y][i])) / 327.68f;
|
||||
for (; i < transformed->W; i++) {
|
||||
atan2Buffer[i] = xatan2f (original->b[y][i], original->a[y][i]);
|
||||
sqrtBuffer[i] = sqrt (SQR (original->b[y][i]) + SQR (original->a[y][i])) / 327.68f;
|
||||
}
|
||||
} else {
|
||||
for (; i < transformed->W - 3; i += 4) {
|
||||
vfloat av = LVFU (original->a[y][i]);
|
||||
vfloat bv = LVFU (original->b[y][i]);
|
||||
STVF (sqrtBuffer[i], _mm_sqrt_ps (SQRV (bv) + SQRV (av)) / c327d68v);
|
||||
}
|
||||
for (; i < transformed->W; i++) {
|
||||
sqrtBuffer[i] = sqrt (SQR (original->b[y][i]) + SQR (original->a[y][i])) / 327.68f;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int loy = cy + y;
|
||||
|
||||
for (int x = 0; x < transformed->W; x++) {
|
||||
int lox = cx + x;
|
||||
#ifdef __SSE2__
|
||||
float rhue = atan2Buffer[x];
|
||||
float rchro = sqrtBuffer[x];
|
||||
#else
|
||||
float rhue = xatan2f (original->b[y][x], original->a[y][x]);
|
||||
float rchro = sqrt (SQR (original->b[y][x]) + SQR (original->a[y][x])) / 327.68f;
|
||||
#endif
|
||||
int zone;
|
||||
int zone = 0;
|
||||
float localFactor = 1.f;
|
||||
calcTransition (lox, loy, ach, lp, zone, localFactor);
|
||||
if(zone == 0) { // outside selection and outside transition zone => no effect, keep original values
|
||||
transformed->L[y][x] = original->L[y][x];
|
||||
continue;
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
float rchro = sqrtBuffer[x];
|
||||
#else
|
||||
float rchro = sqrt (SQR (original->b[y][x]) + SQR (original->a[y][x])) / 327.68f;
|
||||
#endif
|
||||
//prepare shape detection
|
||||
float khu = 0.f;
|
||||
float kch = 1.f;
|
||||
bool kzon = false;
|
||||
float fach = 1.f;
|
||||
float deltachro = fabs (rchro - chromaref);
|
||||
float deltahue = fabs (rhue - hueref);
|
||||
|
||||
if (deltahue > rtengine::RT_PI) {
|
||||
deltahue = - (deltahue - 2.f * rtengine::RT_PI);
|
||||
}
|
||||
|
||||
float deltaE = 20.f * deltahue + deltachro; //pseudo deltaE between 0 and 280
|
||||
|
||||
//kch to modulate action with chroma
|
||||
if (deltachro < 160.f * SQR (lp.senssha / 100.f)) {
|
||||
@@ -2641,15 +2649,24 @@ void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const floa
|
||||
float ak = 1.f / (ck - 160.f);
|
||||
float bk = -160.f * ak;
|
||||
kch = ak * deltachro + bk;
|
||||
if (lp.senssha < 40.f ) {
|
||||
kch = pow_F (kch, pa * lp.senssha + pb); //increase under 40
|
||||
}
|
||||
}
|
||||
|
||||
if (lp.senssha < 40.f ) {
|
||||
kch = pow (kch, pa * lp.senssha + pb); //increase under 40
|
||||
}
|
||||
|
||||
|
||||
// algo with detection of hue ==> artifacts for noisy images ==> denoise before
|
||||
if (lp.senssha < 20.f) { //to try...
|
||||
if (detectHue) { //to try...
|
||||
#ifdef __SSE2__
|
||||
float rhue = atan2Buffer[x];
|
||||
#else
|
||||
float rhue = xatan2f (original->b[y][x], original->a[y][x]);
|
||||
#endif
|
||||
float khu = 0.f;
|
||||
float deltahue = fabs (rhue - hueref);
|
||||
|
||||
if (deltahue > rtengine::RT_PI) {
|
||||
deltahue = - (deltahue - 2.f * rtengine::RT_PI);
|
||||
}
|
||||
//hue detection
|
||||
if ((hueref + dhue) < rtengine::RT_PI && rhue < hueplus && rhue > huemoins) { //transition are good
|
||||
if (rhue >= hueplus - delhu ) {
|
||||
@@ -2661,7 +2678,6 @@ void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const floa
|
||||
}
|
||||
|
||||
|
||||
kzon = true;
|
||||
} else if ((hueref + dhue) >= rtengine::RT_PI && (rhue > huemoins || rhue < hueplus )) {
|
||||
if (rhue >= hueplus - delhu && rhue < hueplus) {
|
||||
khu = apl * rhue + bpl;
|
||||
@@ -2671,7 +2687,6 @@ void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const floa
|
||||
khu = 1.f;
|
||||
}
|
||||
|
||||
kzon = true;
|
||||
}
|
||||
|
||||
if ((hueref - dhue) > -rtengine::RT_PI && rhue < hueplus && rhue > huemoins ) {
|
||||
@@ -2683,7 +2698,6 @@ void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const floa
|
||||
khu = 1.f;
|
||||
}
|
||||
|
||||
kzon = true;
|
||||
} else if ((hueref - dhue) <= -rtengine::RT_PI && (rhue > huemoins || rhue < hueplus )) {
|
||||
if (rhue >= hueplus - delhu && rhue < hueplus) {
|
||||
khu = apl * rhue + bpl;
|
||||
@@ -2693,9 +2707,10 @@ void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const floa
|
||||
khu = 1.f;
|
||||
}
|
||||
|
||||
kzon = true;
|
||||
}
|
||||
|
||||
float deltaE = 20.f * deltahue + deltachro; //pseudo deltaE between 0 and 280
|
||||
|
||||
if (deltaE < 2.8f * lp.senssha) {
|
||||
fach = khu;
|
||||
} else {
|
||||
@@ -2709,33 +2724,12 @@ void ImProcFunctions::Sharp_Local (int call, int sp, float **loctemp, const floa
|
||||
fach *= (1.f / (kcr * kcr)) * rchro * rchro;
|
||||
}
|
||||
|
||||
if (lp.qualmet == 1) {
|
||||
} else {
|
||||
fach = 1.f;
|
||||
}
|
||||
|
||||
//fach = khu ;
|
||||
|
||||
} else {
|
||||
/*
|
||||
float kcr = 8.f;
|
||||
if(lp.senssha > 30.f){
|
||||
if (rchro < kcr) {
|
||||
fach *= (1.f / (kcr)) * rchro;
|
||||
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
int begx = int (lp.xc - lp.lxL);
|
||||
int begy = int (lp.yc - lp.lyT);
|
||||
|
||||
switch (zone) {
|
||||
case 0: { // outside selection and outside transition zone => no effect, keep original values
|
||||
transformed->L[y][x] = original->L[y][x];
|
||||
break;
|
||||
}
|
||||
|
||||
case 1: { // inside transition zone
|
||||
float factorx = localFactor;
|
||||
@@ -4831,70 +4825,36 @@ void ImProcFunctions::Lab_Local (int call, int sp, float** shbuffer, LabImage *
|
||||
|
||||
//end cbdl
|
||||
if (!lp.invshar && lp.shrad > 0.42 && call < 3 && lp.sharpena) { //interior ellipse for sharpening, call = 1 and 2 only with Dcrop and simpleprocess
|
||||
|
||||
int GW = original->W;
|
||||
int GH = original->H;
|
||||
float **bufsh;//buffer por square zone
|
||||
float **loctemp;
|
||||
float **hbuffer;
|
||||
int bfh = int (lp.ly + lp.lyT) + del; //bfw bfh real size of square zone
|
||||
int bfw = int (lp.lx + lp.lxL) + del;
|
||||
int bfh = call == 2 ? int (lp.ly + lp.lyT) + del : original->H; //bfw bfh real size of square zone
|
||||
int bfw = call == 2 ? int (lp.lx + lp.lxL) + del : original->W;
|
||||
const JaggedArray<float> loctemp (bfw, bfh);
|
||||
|
||||
if (call == 2) { //call from simpleprocess
|
||||
bufsh = new float*[bfh];
|
||||
|
||||
for (int i = 0; i < bfh; i++) {
|
||||
bufsh[i] = new float[bfw];
|
||||
}
|
||||
const JaggedArray<float> bufsh (bfw, bfh, true);
|
||||
const JaggedArray<float> hbuffer (bfw, bfh);
|
||||
|
||||
int yStart = lp.yc - lp.lyT - cy;
|
||||
int yEnd = lp.yc + lp.ly - cy;
|
||||
int xStart = lp.xc - lp.lxL - cx;
|
||||
int xEnd = lp.xc + lp.lx - cx;
|
||||
int begy = lp.yc - lp.lyT;
|
||||
int begx = lp.xc - lp.lxL;
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#pragma omp parallel for schedule(dynamic,16)
|
||||
#endif
|
||||
|
||||
for (int ir = 0; ir < bfh; ir++) //fill with 0
|
||||
for (int jr = 0; jr < bfw; jr++) {
|
||||
bufsh[ir][jr] = 0.f;
|
||||
for (int y = yStart; y < yEnd ; y++) {
|
||||
int loy = cy + y;
|
||||
for (int x = xStart, lox = cx + x; x < xEnd; x++, lox++) {
|
||||
bufsh[loy - begy][lox - begx] = original->L[y][x];//fill square buffer with datas
|
||||
}
|
||||
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int y = 0; y < transformed->H ; y++) //{
|
||||
for (int x = 0; x < transformed->W; x++) {
|
||||
int lox = cx + x;
|
||||
int loy = cy + y;
|
||||
int begx = int (lp.xc - lp.lxL);
|
||||
int begy = int (lp.yc - lp.lyT);
|
||||
|
||||
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
|
||||
bufsh[loy - begy][lox - begx] = original->L[y][x];//fill square buffer with datas
|
||||
}
|
||||
}
|
||||
|
||||
loctemp = new float*[bfh];//allocate temp
|
||||
|
||||
for (int i = 0; i < bfh; i++) {
|
||||
loctemp[i] = new float[bfw];
|
||||
}
|
||||
|
||||
hbuffer = new float*[bfh];//allocate buffer for sharp
|
||||
|
||||
for (int i = 0; i < bfh; i++) {
|
||||
hbuffer[i] = new float[bfw];
|
||||
}
|
||||
|
||||
//sharpen only square area instaed of all image
|
||||
ImProcFunctions::deconvsharpeningloc (bufsh, hbuffer, bfw, bfh, loctemp, params->locallab.shardamping, (double)params->locallab.sharradius / 100., params->locallab.shariter, params->locallab.sharamount);
|
||||
} else { //call from dcrop.cc
|
||||
loctemp = new float*[GH];//allocate temp
|
||||
|
||||
for (int i = 0; i < GH; i++) {
|
||||
loctemp[i] = new float[GW];
|
||||
}
|
||||
|
||||
ImProcFunctions::deconvsharpeningloc (original->L, shbuffer, GW, GH, loctemp, params->locallab.shardamping, (double)params->locallab.sharradius / 100., params->locallab.shariter, params->locallab.sharamount);
|
||||
ImProcFunctions::deconvsharpeningloc (original->L, shbuffer, bfw, bfh, loctemp, params->locallab.shardamping, (double)params->locallab.sharradius / 100., params->locallab.shariter, params->locallab.sharamount);
|
||||
|
||||
}
|
||||
|
||||
@@ -4912,50 +4872,10 @@ void ImProcFunctions::Lab_Local (int call, int sp, float** shbuffer, LabImage *
|
||||
//sharpen ellipse and transition
|
||||
Sharp_Local (call, sp, loctemp, hueplus, huemoins, hueref, dhue, chromaref, lumaref, lp, original, transformed, cx, cy);
|
||||
|
||||
//cleann all
|
||||
if (call == 2 && !lp.invshar) {
|
||||
for (int i = 0; i < bfh; i++) {
|
||||
delete [] loctemp[i];
|
||||
}
|
||||
|
||||
delete [] loctemp;
|
||||
|
||||
for (int i = 0; i < bfh; i++) {
|
||||
delete [] bufsh[i];
|
||||
}
|
||||
|
||||
delete [] bufsh;
|
||||
|
||||
for (int i = 0; i < bfh; i++) {
|
||||
delete [] hbuffer[i];
|
||||
}
|
||||
|
||||
delete [] hbuffer;
|
||||
} else {
|
||||
for (int i = 0; i < GH; i++) {
|
||||
delete [] loctemp[i];
|
||||
}
|
||||
|
||||
delete [] loctemp;
|
||||
|
||||
}
|
||||
|
||||
/* for (int i = 0; i < GH; i++) {
|
||||
delete [] hbuffer[i];
|
||||
}
|
||||
|
||||
delete [] hbuffer;
|
||||
*/
|
||||
|
||||
} else if (lp.invshar && lp.shrad > 0.42 && call < 3 && lp.sharpena) {
|
||||
int GW = original->W;
|
||||
int GH = original->H;
|
||||
|
||||
float **loctemp = new float*[GH];
|
||||
|
||||
for (int i = 0; i < GH; i++) {
|
||||
loctemp[i] = new float[GW];
|
||||
}
|
||||
const JaggedArray<float> loctemp (GW, GH);
|
||||
|
||||
ImProcFunctions::deconvsharpeningloc (original->L, shbuffer, GW, GH, loctemp, params->locallab.shardamping, (double)params->locallab.sharradius / 100., params->locallab.shariter, params->locallab.sharamount);
|
||||
|
||||
@@ -4971,13 +4891,6 @@ void ImProcFunctions::Lab_Local (int call, int sp, float** shbuffer, LabImage *
|
||||
}
|
||||
|
||||
InverseSharp_Local (sp, loctemp, hueplus, huemoins, hueref, dhue, chromaref, lumaref, lp, original, transformed, cx, cy);
|
||||
|
||||
for (int i = 0; i < GH; i++) {
|
||||
delete [] loctemp[i];
|
||||
}
|
||||
|
||||
delete [] loctemp;
|
||||
|
||||
}
|
||||
|
||||
// }
|
||||
|
||||
Reference in New Issue
Block a user