Small speedup for rl sharpening when damping = 0, no issue

This commit is contained in:
heckflosse 2018-06-12 17:59:10 +02:00
parent 168309f45b
commit a3055f3552
2 changed files with 20 additions and 30 deletions

View File

@ -163,15 +163,15 @@ template<class T> void gauss3x3div (T** RESTRICT src, T** RESTRICT dst, T** REST
#pragma omp single nowait
#endif
{
dst[0][0] = divBuffer[0][0] / (src[0][0] > 0.f ? src[0][0] : 1.f);
dst[0][0] = rtengine::max(divBuffer[0][0] / (src[0][0] > 0.f ? src[0][0] : 1.f), 0.f);
for (int j = 1; j < W - 1; j++)
{
float tmp = (b1 * (src[0][j - 1] + src[0][j + 1]) + b0 * src[0][j]);
dst[0][j] = divBuffer[0][j] / (tmp > 0.f ? tmp : 1.f);
dst[0][j] = rtengine::max(divBuffer[0][j] / (tmp > 0.f ? tmp : 1.f), 0.f);
}
dst[0][W - 1] = divBuffer[0][W - 1] / (src[0][W - 1] > 0.f ? src[0][W - 1] : 1.f);
dst[0][W - 1] = rtengine::max(divBuffer[0][W - 1] / (src[0][W - 1] > 0.f ? src[0][W - 1] : 1.f), 0.f);
}
#ifdef _OPENMP
@ -180,15 +180,15 @@ template<class T> void gauss3x3div (T** RESTRICT src, T** RESTRICT dst, T** REST
for (int i = 1; i < H - 1; i++) {
float tmp = (b1 * (src[i - 1][0] + src[i + 1][0]) + b0 * src[i][0]);
dst[i][0] = divBuffer[i][0] / (tmp > 0.f ? tmp : 1.f);
dst[i][0] = rtengine::max(divBuffer[i][0] / (tmp > 0.f ? tmp : 1.f), 0.f);
for (int j = 1; j < W - 1; j++) {
tmp = (c2 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + c1 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + c0 * src[i][j]);
dst[i][j] = divBuffer[i][j] / (tmp > 0.f ? tmp : 1.f);
dst[i][j] = rtengine::max(divBuffer[i][j] / (tmp > 0.f ? tmp : 1.f), 0.f);
}
tmp = (b1 * (src[i - 1][W - 1] + src[i + 1][W - 1]) + b0 * src[i][W - 1]);
dst[i][W - 1] = divBuffer[i][W - 1] / (tmp > 0.f ? tmp : 1.f);
dst[i][W - 1] = rtengine::max(divBuffer[i][W - 1] / (tmp > 0.f ? tmp : 1.f), 0.f);
}
// last row
@ -196,14 +196,14 @@ template<class T> void gauss3x3div (T** RESTRICT src, T** RESTRICT dst, T** REST
#pragma omp single
#endif
{
dst[H - 1][0] = divBuffer[H - 1][0] / (src[H - 1][0] > 0.f ? src[H - 1][0] : 1.f);
dst[H - 1][0] = rtengine::max(divBuffer[H - 1][0] / (src[H - 1][0] > 0.f ? src[H - 1][0] : 1.f), 0.f);
for (int j = 1; j < W - 1; j++) {
float tmp = (b1 * (src[H - 1][j - 1] + src[H - 1][j + 1]) + b0 * src[H - 1][j]);
dst[H - 1][j] = divBuffer[H - 1][j] / (tmp > 0.f ? tmp : 1.f);
dst[H - 1][j] = rtengine::max(divBuffer[H - 1][j] / (tmp > 0.f ? tmp : 1.f), 0.f);
}
dst[H - 1][W - 1] = divBuffer[H - 1][W - 1] / (src[H - 1][W - 1] > 0.f ? src[H - 1][W - 1] : 1.f);
dst[H - 1][W - 1] = rtengine::max(divBuffer[H - 1][W - 1] / (src[H - 1][W - 1] > 0.f ? src[H - 1][W - 1] : 1.f), 0.f);
}
}
@ -859,8 +859,8 @@ template<class T> void gaussVerticalSsediv (T** src, T** dst, T** divBuffer, con
Tv1 = Rv1;
Rv = LVF(tmp[j][0]) * Bv + Tv * b1v + Tm2v * b2v + Tm3v * b3v;
Rv1 = LVF(tmp[j][4]) * Bv + Tv1 * b1v + Tm2v1 * b2v + Tm3v1 * b3v;
STVFU( dst[j][i], LVFU(divBuffer[j][i]) / vself(vmaskf_gt(Rv, ZEROV), Rv, onev));
STVFU( dst[j][i + 4], LVFU(divBuffer[j][i + 4]) / vself(vmaskf_gt(Rv1, ZEROV), Rv1, onev));
STVFU( dst[j][i], vmaxf(LVFU(divBuffer[j][i]) / vself(vmaskf_gt(Rv, ZEROV), Rv, onev), ZEROV));
STVFU( dst[j][i + 4], vmaxf(LVFU(divBuffer[j][i + 4]) / vself(vmaskf_gt(Rv1, ZEROV), Rv1, onev), ZEROV));
Tm3v = Tm2v;
Tm3v1 = Tm2v1;
Tm2v = Tv;
@ -895,7 +895,7 @@ template<class T> void gaussVerticalSsediv (T** src, T** dst, T** divBuffer, con
}
for (int j = 0; j < H; j++) {
dst[j][i] = divBuffer[j][i] / (tmp[j][0] > 0.f ? tmp[j][0] : 1.f);
dst[j][i] = rtengine::max(divBuffer[j][i] / (tmp[j][0] > 0.f ? tmp[j][0] : 1.f), 0.f);
}
}
@ -1020,14 +1020,14 @@ template<class T> void gaussVerticaldiv (T** src, T** dst, T** divBuffer, const
}
for (int k = 0; k < numcols; k++) {
dst[H - 1][i + k] = divBuffer[H - 1][i + k] / (temp2[H - 1][k] = temp2Hm1[k]);
dst[H - 2][i + k] = divBuffer[H - 2][i + k] / (temp2[H - 2][k] = B * temp2[H - 2][k] + b1 * temp2[H - 1][k] + b2 * temp2H[k] + b3 * temp2Hp1[k]);
dst[H - 3][i + k] = divBuffer[H - 3][i + k] / (temp2[H - 3][k] = B * temp2[H - 3][k] + b1 * temp2[H - 2][k] + b2 * temp2[H - 1][k] + b3 * temp2H[k]);
rtengine::max(dst[H - 1][i + k] = divBuffer[H - 1][i + k] / (temp2[H - 1][k] = temp2Hm1[k]), 0.f);
rtengine::max(dst[H - 2][i + k] = divBuffer[H - 2][i + k] / (temp2[H - 2][k] = B * temp2[H - 2][k] + b1 * temp2[H - 1][k] + b2 * temp2H[k] + b3 * temp2Hp1[k]), 0.f);
rtengine::max(dst[H - 3][i + k] = divBuffer[H - 3][i + k] / (temp2[H - 3][k] = B * temp2[H - 3][k] + b1 * temp2[H - 2][k] + b2 * temp2[H - 1][k] + b3 * temp2H[k], 0.f);
}
for (int j = H - 4; j >= 0; j--) {
for (int k = 0; k < numcols; k++) {
dst[j][i + k] = divBuffer[j][i + k] / (temp2[j][k] = B * temp2[j][k] + b1 * temp2[j + 1][k] + b2 * temp2[j + 2][k] + b3 * temp2[j + 3][k]);
rtengine::max(dst[j][i + k] = divBuffer[j][i + k] / (temp2[j][k] = B * temp2[j][k] + b1 * temp2[j + 1][k] + b2 * temp2[j + 2][k] + b3 * temp2[j + 3][k]), 0.f);
}
}
}
@ -1050,12 +1050,12 @@ template<class T> void gaussVerticaldiv (T** src, T** dst, T** divBuffer, const
double temp2H = src[H - 1][i] + M[1][0] * (temp2[H - 1][0] - src[H - 1][i]) + M[1][1] * (temp2[H - 2][0] - src[H - 1][i]) + M[1][2] * (temp2[H - 3][0] - src[H - 1][i]);
double temp2Hp1 = src[H - 1][i] + M[2][0] * (temp2[H - 1][0] - src[H - 1][i]) + M[2][1] * (temp2[H - 2][0] - src[H - 1][i]) + M[2][2] * (temp2[H - 3][0] - src[H - 1][i]);
dst[H - 1][i] = divBuffer[H - 1][i] / (temp2[H - 1][0] = temp2Hm1);
dst[H - 2][i] = divBuffer[H - 2][i] / (temp2[H - 2][0] = B * temp2[H - 2][0] + b1 * temp2[H - 1][0] + b2 * temp2H + b3 * temp2Hp1);
dst[H - 3][i] = divBuffer[H - 3][i] / (temp2[H - 3][0] = B * temp2[H - 3][0] + b1 * temp2[H - 2][0] + b2 * temp2[H - 1][0] + b3 * temp2H);
rtengine::max(dst[H - 1][i] = divBuffer[H - 1][i] / (temp2[H - 1][0] = temp2Hm1), 0.f);
rtengine::max(dst[H - 2][i] = divBuffer[H - 2][i] / (temp2[H - 2][0] = B * temp2[H - 2][0] + b1 * temp2[H - 1][0] + b2 * temp2H + b3 * temp2Hp1), 0.f);
rtengine::max(dst[H - 3][i] = divBuffer[H - 3][i] / (temp2[H - 3][0] = B * temp2[H - 3][0] + b1 * temp2[H - 2][0] + b2 * temp2[H - 1][0] + b3 * temp2H), 0.f);
for (int j = H - 4; j >= 0; j--) {
dst[j][i] = divBuffer[j][i] / (temp2[j][0] = B * temp2[j][0] + b1 * temp2[j + 1][0] + b2 * temp2[j + 2][0] + b3 * temp2[j + 3][0]);
rtengine::max(dst[j][i] = divBuffer[j][i] / (temp2[j][0] = B * temp2[j][0] + b1 * temp2[j + 1][0] + b2 * temp2[j + 2][0] + b3 * temp2[j + 3][0]), 0.f);
}
}
}

View File

@ -190,22 +190,12 @@ BENCHFUN
if (!needdamp) {
// apply gaussian blur and divide luminance by result of gaussian blur
gaussianBlur(tmpI, tmp, W, H, sigma, nullptr, GAUSS_DIV, luminance);
#ifdef _OPENMP
#pragma omp for
#endif
for (int i = 0; i < H; ++i) {
for(int j = 0; j < W; ++j) {
tmp[i][j] = max(tmp[i][j], 0.f);
}
}
} else {
// apply gaussian blur + damping
gaussianBlur(tmpI, tmp, W, H, sigma);
dcdamping(tmp, luminance, damping, W, H);
}
gaussianBlur(tmp, tmpI, W, H, sigma, nullptr, GAUSS_MULT);
} // end for
#ifdef _OPENMP