anothers speed-ups

This commit is contained in:
Desmis
2017-04-18 16:12:43 +02:00
parent 46f7f20822
commit 9feecf4f1d

View File

@@ -563,6 +563,13 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
#endif
for (int y = 0; y < transformed->H; y++) {
const int loy = cy + y;
const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing
if (isZone0) { // outside selection and outside transition zone => no effect, keep original values
continue;
}
#ifdef __SSE2__
int i = 0;
@@ -580,14 +587,19 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
#endif
int loy = cy + y;
for (int x = 0; x < transformed->W; x++) {
int lox = cx + x;
const int lox = cx + x;
int begx = int (lp.xc - lp.lxL);
int begy = int (lp.yc - lp.lyT);
int zone = 0;
float localFactor = 1.f;
calcTransition (lox, loy, ach, lp, zone, localFactor);
if (zone == 0) {
continue;
}
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
#ifdef __SSE2__
float rhue = atan2Buffer[x];
@@ -596,7 +608,7 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
float rhue = xatan2f (original->b[y][x], original->a[y][x]);
float rchro = sqrt (SQR (original->b[y][x]) + SQR (original->a[y][x])) / 327.68f;
#endif
int zone;
// int zone;
//retrieve data
float cli = 1.f;
@@ -615,8 +627,8 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
float realcligh = 1.f;
float localFactor = 1.f;
calcTransition (lox, loy, ach, lp, zone, localFactor);
// float localFactor = 1.f;
// calcTransition (lox, loy, ach, lp, zone, localFactor);
//prepare shape detection
float khu = 0.f;
float kch = 1.f;
@@ -785,7 +797,7 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
}
}
}
// }
}
}
}
@@ -826,6 +838,13 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
#endif
for (int y = 0; y < transformed->H; y++) {
const int loy = cy + y;
const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing
if (isZone0) { // outside selection and outside transition zone => no effect, keep original values
continue;
}
#ifdef __SSE2__
int i = 0;
@@ -843,14 +862,25 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
#endif
int loy = cy + y;
for (int x = 0; x < transformed->W; x++) {
int lox = cx + x;
int begx = int (lp.xc - lp.lxL);
int begy = int (lp.yc - lp.lyT);
const int lox = cx + x;
const int begx = lp.xc - lp.lxL;
const int begy = lp.yc - lp.lyT;
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
float rL;
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && (rL = original->L[y][x]) > 3.2768f) {
// rL > 3.2768f to avoid crash with very low gamut in rare cases ex : L=0.01 a=0.5 b=-0.9
int zone = 0;
float localFactor = 1.f;
calcTransition (lox, loy, ach, lp, zone, localFactor);
if (zone == 0) {
continue;
}
// if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
#ifdef __SSE2__
float rhue = atan2Buffer[x];
@@ -859,7 +889,7 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
float rhue = xatan2f (original->b[y][x], original->a[y][x]);
float rchro = sqrt (SQR (original->b[y][x]) + SQR (original->a[y][x])) / 327.68f;
#endif
int zone;
// int zone;
//retrieve data
float cli = 1.f;
@@ -878,8 +908,8 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
float realcligh = 1.f;
float localFactor = 1.f;
calcTransition (lox, loy, ach, lp, zone, localFactor);
// float localFactor = 1.f;
// calcTransition (lox, loy, ach, lp, zone, localFactor);
//prepare shape detection
float khu = 0.f;
float kch = 1.f;
@@ -1057,6 +1087,7 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
transformed->b[y][x] = original->b[y][x] + difb * kch * fach;//same as Luma
}
}
}
}
}
@@ -2806,17 +2837,21 @@ void ImProcFunctions::ColorLight_Local (int call, LabImage * bufcolorig, float *
for (int y = 0; y < transformed->H; y++)
{
const int loy = cy + y;
const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing
if (isZone0) { // outside selection and outside transition zone => no effect, keep original values
continue;
}
#ifdef __SSE2__
int i = 0;
//Todo optimization in this first part with bufcolorig and bufcoltra
for (; i < transformed->W - 3; i += 4) {
vfloat av = LVFU (original->a[y][i]);
vfloat bv = LVFU (original->b[y][i]);
STVF (atan2Buffer[i], xatan2f (bv, av));
STVF (sqrtBuffer[i], _mm_sqrt_ps (SQRV (bv) + SQRV (av)) / c327d68v);
}
for (; i < transformed->W; i++) {
@@ -2826,14 +2861,20 @@ void ImProcFunctions::ColorLight_Local (int call, LabImage * bufcolorig, float *
#endif
int loy = cy + y;
for (int x = 0; x < transformed->W; x++) {
int lox = cx + x;
int begx = int (lp.xc - lp.lxL);
int begy = int (lp.yc - lp.lyT);
const int lox = cx + x;
const int begx = int (lp.xc - lp.lxL);
const int begy = int (lp.yc - lp.lyT);
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
int zone = 0;
float localFactor = 1.f;
calcTransition (lox, loy, ach, lp, zone, localFactor);
if (zone == 0) {
continue;
}
#ifdef __SSE2__
@@ -3177,9 +3218,9 @@ void ImProcFunctions::ColorLight_Local (int call, LabImage * bufcolorig, float *
}
int zone;
float localFactor;
calcTransition (lox, loy, ach, lp, zone, localFactor);
// int zone;
// float localFactor;
// calcTransition (lox, loy, ach, lp, zone, localFactor);
float th_r = 0.01f;
if (rL > th_r) { //to avoid crash with very low gamut in rare cases ex : L=0.01 a=0.5 b=-0.9
@@ -3360,7 +3401,8 @@ void ImProcFunctions::ColorLight_Local (int call, LabImage * bufcolorig, float *
}
}
}
// }
}
}
}