Small speedup for rl sharpening when damping = 0, no issue
This commit is contained in:
parent
168309f45b
commit
a3055f3552
@ -163,15 +163,15 @@ template<class T> void gauss3x3div (T** RESTRICT src, T** RESTRICT dst, T** REST
|
||||
#pragma omp single nowait
|
||||
#endif
|
||||
{
|
||||
dst[0][0] = divBuffer[0][0] / (src[0][0] > 0.f ? src[0][0] : 1.f);
|
||||
dst[0][0] = rtengine::max(divBuffer[0][0] / (src[0][0] > 0.f ? src[0][0] : 1.f), 0.f);
|
||||
|
||||
for (int j = 1; j < W - 1; j++)
|
||||
{
|
||||
float tmp = (b1 * (src[0][j - 1] + src[0][j + 1]) + b0 * src[0][j]);
|
||||
dst[0][j] = divBuffer[0][j] / (tmp > 0.f ? tmp : 1.f);
|
||||
dst[0][j] = rtengine::max(divBuffer[0][j] / (tmp > 0.f ? tmp : 1.f), 0.f);
|
||||
}
|
||||
|
||||
dst[0][W - 1] = divBuffer[0][W - 1] / (src[0][W - 1] > 0.f ? src[0][W - 1] : 1.f);
|
||||
dst[0][W - 1] = rtengine::max(divBuffer[0][W - 1] / (src[0][W - 1] > 0.f ? src[0][W - 1] : 1.f), 0.f);
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
@ -180,15 +180,15 @@ template<class T> void gauss3x3div (T** RESTRICT src, T** RESTRICT dst, T** REST
|
||||
|
||||
for (int i = 1; i < H - 1; i++) {
|
||||
float tmp = (b1 * (src[i - 1][0] + src[i + 1][0]) + b0 * src[i][0]);
|
||||
dst[i][0] = divBuffer[i][0] / (tmp > 0.f ? tmp : 1.f);
|
||||
dst[i][0] = rtengine::max(divBuffer[i][0] / (tmp > 0.f ? tmp : 1.f), 0.f);
|
||||
|
||||
for (int j = 1; j < W - 1; j++) {
|
||||
tmp = (c2 * (src[i - 1][j - 1] + src[i - 1][j + 1] + src[i + 1][j - 1] + src[i + 1][j + 1]) + c1 * (src[i - 1][j] + src[i][j - 1] + src[i][j + 1] + src[i + 1][j]) + c0 * src[i][j]);
|
||||
dst[i][j] = divBuffer[i][j] / (tmp > 0.f ? tmp : 1.f);
|
||||
dst[i][j] = rtengine::max(divBuffer[i][j] / (tmp > 0.f ? tmp : 1.f), 0.f);
|
||||
}
|
||||
|
||||
tmp = (b1 * (src[i - 1][W - 1] + src[i + 1][W - 1]) + b0 * src[i][W - 1]);
|
||||
dst[i][W - 1] = divBuffer[i][W - 1] / (tmp > 0.f ? tmp : 1.f);
|
||||
dst[i][W - 1] = rtengine::max(divBuffer[i][W - 1] / (tmp > 0.f ? tmp : 1.f), 0.f);
|
||||
}
|
||||
|
||||
// last row
|
||||
@ -196,14 +196,14 @@ template<class T> void gauss3x3div (T** RESTRICT src, T** RESTRICT dst, T** REST
|
||||
#pragma omp single
|
||||
#endif
|
||||
{
|
||||
dst[H - 1][0] = divBuffer[H - 1][0] / (src[H - 1][0] > 0.f ? src[H - 1][0] : 1.f);
|
||||
dst[H - 1][0] = rtengine::max(divBuffer[H - 1][0] / (src[H - 1][0] > 0.f ? src[H - 1][0] : 1.f), 0.f);
|
||||
|
||||
for (int j = 1; j < W - 1; j++) {
|
||||
float tmp = (b1 * (src[H - 1][j - 1] + src[H - 1][j + 1]) + b0 * src[H - 1][j]);
|
||||
dst[H - 1][j] = divBuffer[H - 1][j] / (tmp > 0.f ? tmp : 1.f);
|
||||
dst[H - 1][j] = rtengine::max(divBuffer[H - 1][j] / (tmp > 0.f ? tmp : 1.f), 0.f);
|
||||
}
|
||||
|
||||
dst[H - 1][W - 1] = divBuffer[H - 1][W - 1] / (src[H - 1][W - 1] > 0.f ? src[H - 1][W - 1] : 1.f);
|
||||
dst[H - 1][W - 1] = rtengine::max(divBuffer[H - 1][W - 1] / (src[H - 1][W - 1] > 0.f ? src[H - 1][W - 1] : 1.f), 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
@ -859,8 +859,8 @@ template<class T> void gaussVerticalSsediv (T** src, T** dst, T** divBuffer, con
|
||||
Tv1 = Rv1;
|
||||
Rv = LVF(tmp[j][0]) * Bv + Tv * b1v + Tm2v * b2v + Tm3v * b3v;
|
||||
Rv1 = LVF(tmp[j][4]) * Bv + Tv1 * b1v + Tm2v1 * b2v + Tm3v1 * b3v;
|
||||
STVFU( dst[j][i], LVFU(divBuffer[j][i]) / vself(vmaskf_gt(Rv, ZEROV), Rv, onev));
|
||||
STVFU( dst[j][i + 4], LVFU(divBuffer[j][i + 4]) / vself(vmaskf_gt(Rv1, ZEROV), Rv1, onev));
|
||||
STVFU( dst[j][i], vmaxf(LVFU(divBuffer[j][i]) / vself(vmaskf_gt(Rv, ZEROV), Rv, onev), ZEROV));
|
||||
STVFU( dst[j][i + 4], vmaxf(LVFU(divBuffer[j][i + 4]) / vself(vmaskf_gt(Rv1, ZEROV), Rv1, onev), ZEROV));
|
||||
Tm3v = Tm2v;
|
||||
Tm3v1 = Tm2v1;
|
||||
Tm2v = Tv;
|
||||
@ -895,7 +895,7 @@ template<class T> void gaussVerticalSsediv (T** src, T** dst, T** divBuffer, con
|
||||
}
|
||||
|
||||
for (int j = 0; j < H; j++) {
|
||||
dst[j][i] = divBuffer[j][i] / (tmp[j][0] > 0.f ? tmp[j][0] : 1.f);
|
||||
dst[j][i] = rtengine::max(divBuffer[j][i] / (tmp[j][0] > 0.f ? tmp[j][0] : 1.f), 0.f);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1020,14 +1020,14 @@ template<class T> void gaussVerticaldiv (T** src, T** dst, T** divBuffer, const
|
||||
}
|
||||
|
||||
for (int k = 0; k < numcols; k++) {
|
||||
dst[H - 1][i + k] = divBuffer[H - 1][i + k] / (temp2[H - 1][k] = temp2Hm1[k]);
|
||||
dst[H - 2][i + k] = divBuffer[H - 2][i + k] / (temp2[H - 2][k] = B * temp2[H - 2][k] + b1 * temp2[H - 1][k] + b2 * temp2H[k] + b3 * temp2Hp1[k]);
|
||||
dst[H - 3][i + k] = divBuffer[H - 3][i + k] / (temp2[H - 3][k] = B * temp2[H - 3][k] + b1 * temp2[H - 2][k] + b2 * temp2[H - 1][k] + b3 * temp2H[k]);
|
||||
rtengine::max(dst[H - 1][i + k] = divBuffer[H - 1][i + k] / (temp2[H - 1][k] = temp2Hm1[k]), 0.f);
|
||||
rtengine::max(dst[H - 2][i + k] = divBuffer[H - 2][i + k] / (temp2[H - 2][k] = B * temp2[H - 2][k] + b1 * temp2[H - 1][k] + b2 * temp2H[k] + b3 * temp2Hp1[k]), 0.f);
|
||||
rtengine::max(dst[H - 3][i + k] = divBuffer[H - 3][i + k] / (temp2[H - 3][k] = B * temp2[H - 3][k] + b1 * temp2[H - 2][k] + b2 * temp2[H - 1][k] + b3 * temp2H[k], 0.f);
|
||||
}
|
||||
|
||||
for (int j = H - 4; j >= 0; j--) {
|
||||
for (int k = 0; k < numcols; k++) {
|
||||
dst[j][i + k] = divBuffer[j][i + k] / (temp2[j][k] = B * temp2[j][k] + b1 * temp2[j + 1][k] + b2 * temp2[j + 2][k] + b3 * temp2[j + 3][k]);
|
||||
rtengine::max(dst[j][i + k] = divBuffer[j][i + k] / (temp2[j][k] = B * temp2[j][k] + b1 * temp2[j + 1][k] + b2 * temp2[j + 2][k] + b3 * temp2[j + 3][k]), 0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1050,12 +1050,12 @@ template<class T> void gaussVerticaldiv (T** src, T** dst, T** divBuffer, const
|
||||
double temp2H = src[H - 1][i] + M[1][0] * (temp2[H - 1][0] - src[H - 1][i]) + M[1][1] * (temp2[H - 2][0] - src[H - 1][i]) + M[1][2] * (temp2[H - 3][0] - src[H - 1][i]);
|
||||
double temp2Hp1 = src[H - 1][i] + M[2][0] * (temp2[H - 1][0] - src[H - 1][i]) + M[2][1] * (temp2[H - 2][0] - src[H - 1][i]) + M[2][2] * (temp2[H - 3][0] - src[H - 1][i]);
|
||||
|
||||
dst[H - 1][i] = divBuffer[H - 1][i] / (temp2[H - 1][0] = temp2Hm1);
|
||||
dst[H - 2][i] = divBuffer[H - 2][i] / (temp2[H - 2][0] = B * temp2[H - 2][0] + b1 * temp2[H - 1][0] + b2 * temp2H + b3 * temp2Hp1);
|
||||
dst[H - 3][i] = divBuffer[H - 3][i] / (temp2[H - 3][0] = B * temp2[H - 3][0] + b1 * temp2[H - 2][0] + b2 * temp2[H - 1][0] + b3 * temp2H);
|
||||
rtengine::max(dst[H - 1][i] = divBuffer[H - 1][i] / (temp2[H - 1][0] = temp2Hm1), 0.f);
|
||||
rtengine::max(dst[H - 2][i] = divBuffer[H - 2][i] / (temp2[H - 2][0] = B * temp2[H - 2][0] + b1 * temp2[H - 1][0] + b2 * temp2H + b3 * temp2Hp1), 0.f);
|
||||
rtengine::max(dst[H - 3][i] = divBuffer[H - 3][i] / (temp2[H - 3][0] = B * temp2[H - 3][0] + b1 * temp2[H - 2][0] + b2 * temp2[H - 1][0] + b3 * temp2H), 0.f);
|
||||
|
||||
for (int j = H - 4; j >= 0; j--) {
|
||||
dst[j][i] = divBuffer[j][i] / (temp2[j][0] = B * temp2[j][0] + b1 * temp2[j + 1][0] + b2 * temp2[j + 2][0] + b3 * temp2[j + 3][0]);
|
||||
rtengine::max(dst[j][i] = divBuffer[j][i] / (temp2[j][0] = B * temp2[j][0] + b1 * temp2[j + 1][0] + b2 * temp2[j + 2][0] + b3 * temp2[j + 3][0]), 0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -190,22 +190,12 @@ BENCHFUN
|
||||
if (!needdamp) {
|
||||
// apply gaussian blur and divide luminance by result of gaussian blur
|
||||
gaussianBlur(tmpI, tmp, W, H, sigma, nullptr, GAUSS_DIV, luminance);
|
||||
#ifdef _OPENMP
|
||||
#pragma omp for
|
||||
#endif
|
||||
for (int i = 0; i < H; ++i) {
|
||||
for(int j = 0; j < W; ++j) {
|
||||
tmp[i][j] = max(tmp[i][j], 0.f);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// apply gaussian blur + damping
|
||||
gaussianBlur(tmpI, tmp, W, H, sigma);
|
||||
dcdamping(tmp, luminance, damping, W, H);
|
||||
}
|
||||
|
||||
gaussianBlur(tmp, tmpI, W, H, sigma, nullptr, GAUSS_MULT);
|
||||
|
||||
} // end for
|
||||
|
||||
#ifdef _OPENMP
|
||||
|
Loading…
x
Reference in New Issue
Block a user