anothers speed-ups
This commit is contained in:
@@ -563,6 +563,13 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int y = 0; y < transformed->H; y++) {
|
for (int y = 0; y < transformed->H; y++) {
|
||||||
|
const int loy = cy + y;
|
||||||
|
const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing
|
||||||
|
|
||||||
|
if (isZone0) { // outside selection and outside transition zone => no effect, keep original values
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
@@ -580,14 +587,19 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int loy = cy + y;
|
|
||||||
|
|
||||||
for (int x = 0; x < transformed->W; x++) {
|
for (int x = 0; x < transformed->W; x++) {
|
||||||
int lox = cx + x;
|
const int lox = cx + x;
|
||||||
int begx = int (lp.xc - lp.lxL);
|
int begx = int (lp.xc - lp.lxL);
|
||||||
int begy = int (lp.yc - lp.lyT);
|
int begy = int (lp.yc - lp.lyT);
|
||||||
|
int zone = 0;
|
||||||
|
|
||||||
|
float localFactor = 1.f;
|
||||||
|
calcTransition (lox, loy, ach, lp, zone, localFactor);
|
||||||
|
|
||||||
|
if (zone == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
|
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
float rhue = atan2Buffer[x];
|
float rhue = atan2Buffer[x];
|
||||||
@@ -596,7 +608,7 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
|
|||||||
float rhue = xatan2f (original->b[y][x], original->a[y][x]);
|
float rhue = xatan2f (original->b[y][x], original->a[y][x]);
|
||||||
float rchro = sqrt (SQR (original->b[y][x]) + SQR (original->a[y][x])) / 327.68f;
|
float rchro = sqrt (SQR (original->b[y][x]) + SQR (original->a[y][x])) / 327.68f;
|
||||||
#endif
|
#endif
|
||||||
int zone;
|
// int zone;
|
||||||
|
|
||||||
//retrieve data
|
//retrieve data
|
||||||
float cli = 1.f;
|
float cli = 1.f;
|
||||||
@@ -615,8 +627,8 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
|
|||||||
float realcligh = 1.f;
|
float realcligh = 1.f;
|
||||||
|
|
||||||
|
|
||||||
float localFactor = 1.f;
|
// float localFactor = 1.f;
|
||||||
calcTransition (lox, loy, ach, lp, zone, localFactor);
|
// calcTransition (lox, loy, ach, lp, zone, localFactor);
|
||||||
//prepare shape detection
|
//prepare shape detection
|
||||||
float khu = 0.f;
|
float khu = 0.f;
|
||||||
float kch = 1.f;
|
float kch = 1.f;
|
||||||
@@ -785,7 +797,7 @@ void ImProcFunctions::cbdl_Local (int call, int sp, float ** buflight, float **l
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -826,6 +838,13 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int y = 0; y < transformed->H; y++) {
|
for (int y = 0; y < transformed->H; y++) {
|
||||||
|
const int loy = cy + y;
|
||||||
|
const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing
|
||||||
|
|
||||||
|
if (isZone0) { // outside selection and outside transition zone => no effect, keep original values
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
@@ -843,14 +862,25 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int loy = cy + y;
|
|
||||||
|
|
||||||
for (int x = 0; x < transformed->W; x++) {
|
for (int x = 0; x < transformed->W; x++) {
|
||||||
int lox = cx + x;
|
const int lox = cx + x;
|
||||||
int begx = int (lp.xc - lp.lxL);
|
const int begx = lp.xc - lp.lxL;
|
||||||
int begy = int (lp.yc - lp.lyT);
|
const int begy = lp.yc - lp.lyT;
|
||||||
|
|
||||||
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
|
float rL;
|
||||||
|
|
||||||
|
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && (rL = original->L[y][x]) > 3.2768f) {
|
||||||
|
// rL > 3.2768f to avoid crash with very low gamut in rare cases ex : L=0.01 a=0.5 b=-0.9
|
||||||
|
int zone = 0;
|
||||||
|
|
||||||
|
float localFactor = 1.f;
|
||||||
|
calcTransition (lox, loy, ach, lp, zone, localFactor);
|
||||||
|
|
||||||
|
if (zone == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
float rhue = atan2Buffer[x];
|
float rhue = atan2Buffer[x];
|
||||||
@@ -859,7 +889,7 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
|
|||||||
float rhue = xatan2f (original->b[y][x], original->a[y][x]);
|
float rhue = xatan2f (original->b[y][x], original->a[y][x]);
|
||||||
float rchro = sqrt (SQR (original->b[y][x]) + SQR (original->a[y][x])) / 327.68f;
|
float rchro = sqrt (SQR (original->b[y][x]) + SQR (original->a[y][x])) / 327.68f;
|
||||||
#endif
|
#endif
|
||||||
int zone;
|
// int zone;
|
||||||
|
|
||||||
//retrieve data
|
//retrieve data
|
||||||
float cli = 1.f;
|
float cli = 1.f;
|
||||||
@@ -878,8 +908,8 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
|
|||||||
float realcligh = 1.f;
|
float realcligh = 1.f;
|
||||||
|
|
||||||
|
|
||||||
float localFactor = 1.f;
|
// float localFactor = 1.f;
|
||||||
calcTransition (lox, loy, ach, lp, zone, localFactor);
|
// calcTransition (lox, loy, ach, lp, zone, localFactor);
|
||||||
//prepare shape detection
|
//prepare shape detection
|
||||||
float khu = 0.f;
|
float khu = 0.f;
|
||||||
float kch = 1.f;
|
float kch = 1.f;
|
||||||
@@ -1057,6 +1087,7 @@ void ImProcFunctions::TM_Local (int call, int sp, LabImage * tmp1, float **bufli
|
|||||||
transformed->b[y][x] = original->b[y][x] + difb * kch * fach;//same as Luma
|
transformed->b[y][x] = original->b[y][x] + difb * kch * fach;//same as Luma
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2806,17 +2837,21 @@ void ImProcFunctions::ColorLight_Local (int call, LabImage * bufcolorig, float *
|
|||||||
|
|
||||||
for (int y = 0; y < transformed->H; y++)
|
for (int y = 0; y < transformed->H; y++)
|
||||||
{
|
{
|
||||||
|
const int loy = cy + y;
|
||||||
|
const bool isZone0 = loy > lp.yc + lp.ly || loy < lp.yc - lp.lyT; // whole line is zone 0 => we can skip a lot of processing
|
||||||
|
|
||||||
|
if (isZone0) { // outside selection and outside transition zone => no effect, keep original values
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
int i = 0;
|
int i = 0;
|
||||||
//Todo optimization in this first part with bufcolorig and bufcoltra
|
|
||||||
|
|
||||||
for (; i < transformed->W - 3; i += 4) {
|
for (; i < transformed->W - 3; i += 4) {
|
||||||
vfloat av = LVFU (original->a[y][i]);
|
vfloat av = LVFU (original->a[y][i]);
|
||||||
vfloat bv = LVFU (original->b[y][i]);
|
vfloat bv = LVFU (original->b[y][i]);
|
||||||
STVF (atan2Buffer[i], xatan2f (bv, av));
|
STVF (atan2Buffer[i], xatan2f (bv, av));
|
||||||
STVF (sqrtBuffer[i], _mm_sqrt_ps (SQRV (bv) + SQRV (av)) / c327d68v);
|
STVF (sqrtBuffer[i], _mm_sqrt_ps (SQRV (bv) + SQRV (av)) / c327d68v);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; i < transformed->W; i++) {
|
for (; i < transformed->W; i++) {
|
||||||
@@ -2826,14 +2861,20 @@ void ImProcFunctions::ColorLight_Local (int call, LabImage * bufcolorig, float *
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int loy = cy + y;
|
|
||||||
|
|
||||||
for (int x = 0; x < transformed->W; x++) {
|
for (int x = 0; x < transformed->W; x++) {
|
||||||
int lox = cx + x;
|
const int lox = cx + x;
|
||||||
int begx = int (lp.xc - lp.lxL);
|
const int begx = int (lp.xc - lp.lxL);
|
||||||
int begy = int (lp.yc - lp.lyT);
|
const int begy = int (lp.yc - lp.lyT);
|
||||||
|
|
||||||
if (lox >= (lp.xc - lp.lxL) && lox < (lp.xc + lp.lx) && loy >= (lp.yc - lp.lyT) && loy < (lp.yc + lp.ly)) {
|
int zone = 0;
|
||||||
|
|
||||||
|
float localFactor = 1.f;
|
||||||
|
calcTransition (lox, loy, ach, lp, zone, localFactor);
|
||||||
|
|
||||||
|
if (zone == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
@@ -3177,9 +3218,9 @@ void ImProcFunctions::ColorLight_Local (int call, LabImage * bufcolorig, float *
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int zone;
|
// int zone;
|
||||||
float localFactor;
|
// float localFactor;
|
||||||
calcTransition (lox, loy, ach, lp, zone, localFactor);
|
// calcTransition (lox, loy, ach, lp, zone, localFactor);
|
||||||
float th_r = 0.01f;
|
float th_r = 0.01f;
|
||||||
|
|
||||||
if (rL > th_r) { //to avoid crash with very low gamut in rare cases ex : L=0.01 a=0.5 b=-0.9
|
if (rL > th_r) { //to avoid crash with very low gamut in rare cases ex : L=0.01 a=0.5 b=-0.9
|
||||||
@@ -3360,7 +3401,8 @@ void ImProcFunctions::ColorLight_Local (int call, LabImage * bufcolorig, float *
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user