Badpixelscam(): Bugfix, simplified interface and SSE code

This commit is contained in:
heckflosse
2018-02-19 17:02:17 +01:00
parent e527ad44cd
commit ad37d6baa8
3 changed files with 153 additions and 116 deletions

View File

@@ -485,7 +485,7 @@ void ImProcFunctions::PF_correct_RTcam(CieImage * src, double radius, int thresh
delete [] fringe;
}
void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, int mode, float skinprot, float chrom, int hotbad)
void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, int mode, float chrom, bool hotbad)
{
BENCHFUN
const int halfwin = ceil(2 * radius) + 1;
@@ -526,7 +526,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in
int j = 0;
for (; j < 2; j++) {
float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
float shmed = 0.0f;
float shmed = 0.f;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
for (int j1 = 0; j1 <= j + 2; j1++ ) {
@@ -552,7 +552,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in
#endif
for (; j < width - 2; j++) {
float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
float shmed = 0.0f;
float shmed = 0.f;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
for (int j1 = j - 2; j1 <= j + 2; j1++ ) {
@@ -564,7 +564,7 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in
for (; j < width; j++) {
float shfabs = fabs(src->sh_p[i][j] - tmL[i][j]);
float shmed = 0.0f;
float shmed = 0.f;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
for (int j1 = j - 2; j1 < width; j1++ ) {
@@ -588,9 +588,9 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in
continue;
}
float norm = 0.0f;
float shsum = 0.0f;
float sum = 0.0f;
float norm = 0.f;
float shsum = 0.f;
float sum = 0.f;
int tot = 0;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
@@ -622,9 +622,9 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in
continue;
}
float norm = 0.0f;
float shsum = 0.0f;
float sum = 0.0f;
float norm = 0.f;
float shsum = 0.f;
float sum = 0.f;
int tot = 0;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
@@ -656,9 +656,9 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in
continue;
}
float norm = 0.0f;
float shsum = 0.0f;
float sum = 0.0f;
float norm = 0.f;
float shsum = 0.f;
float sum = 0.f;
int tot = 0;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++ )
@@ -811,138 +811,175 @@ void ImProcFunctions::Badpixelscam(CieImage * src, double radius, int thresh, in
}
// begin chroma badpixels
double chrommed = 0.0; // use double precision for large summations
if(hotbad) {
double chrommed = 0.0; // use double precision for large summations
#ifdef _OPENMP
#pragma omp parallel for reduction(+:chrommed)
#pragma omp parallel for reduction(+:chrommed)
#endif
for(int i = 0; i < height; i++ ) {
for(int j = 0; j < width; j++) {
float chroma = SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j]);
chrommed += chroma;
badpix[i * width + j] = chroma;
for(int i = 0; i < height; i++ ) {
for(int j = 0; j < width; j++) {
float chroma = SQR(sraa[i][j] - tmaa[i][j]) + SQR(srbb[i][j] - tmbb[i][j]);
chrommed += chroma;
badpix[i * width + j] = chroma;
}
}
}
chrommed /= (height * width);
chrommed /= (height * width);
if(chrommed > 0.0) {
// now chrommed is calculated, so we postprocess badpix to reduce the number of divisions in future
if(chrommed > 0.0) {
// now as chrommed is calculated, we postprocess badpix to reduce the number of divisions in future
#ifdef _OPENMP
#pragma omp parallel
#pragma omp parallel
#endif
{
{
#ifdef __SSE2__
vfloat chrommedv = F2V(chrommed);
vfloat onev = F2V(1.f);
vfloat chrommedv = F2V(chrommed);
vfloat onev = F2V(1.f);
#endif
#ifdef _OPENMP
#pragma omp for
#endif
for(int i = 0; i < height; i++) {
int j = 0;
for(int i = 0; i < height; i++) {
int j = 0;
#ifdef __SSE2__
for(; j < width - 3; j += 4) {
STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + chrommedv));
}
for(; j < width - 3; j += 4) {
STVFU(badpix[i * width + j], onev / (LVFU(badpix[i * width + j]) + chrommedv));
}
#endif
for(; j < width; j++) {
badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed);
for(; j < width; j++) {
badpix[i * width + j] = 1.f / (badpix[i * width + j] + chrommed);
}
}
}
}
const float threshfactor = 1.f / ((thresh * chrommed) / 33.f + chrommed);
const float threshfactor = 1.f / ((thresh * chrommed) / 33.f + chrommed);
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic,16)
#pragma omp parallel for schedule(dynamic,16)
#endif
for(int i = 0; i < height; i++ ) {
int j = 0;
for(; j < halfwin; j++) {
for(int i = 0; i < height; i++ ) {
int j = 0;
for(; j < halfwin; j++) {
if (badpix[i * width + j] < threshfactor) {
float atot = 0.f;
float btot = 0.f;
float norm = 0.f;
float wt;
if (badpix[i * width + j] < threshfactor) {
float atot = 0.f;
float btot = 0.f;
float norm = 0.f;
float wt;
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
for (int j1 = 0; j1 < j + halfwin; j1++) {
wt = badpix[i1 * width + j1];
atot += wt * sraa[i1][j1];
btot += wt * srbb[i1][j1];
norm += wt;
}
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
for (int j1 = 0; j1 < j + halfwin; j1++) {
wt = badpix[i1 * width + j1];
atot += wt * sraa[i1][j1];
btot += wt * srbb[i1][j1];
norm += wt;
}
if(norm > 0.f) {
const float intera = atot / norm;
const float interb = atot / norm;
const float CC = sqrt(SQR(interb) + SQR(intera));
if(norm > 0.f) {
const float intera = atot / norm;
const float interb = btot / norm;
const float CC = sqrt(SQR(interb) + SQR(intera));
if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) {
src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
src->C_p[i][j] = CC;
if(CC < chrom) {
src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
src->C_p[i][j] = CC;
}
}
}
}
}
for(; j < width - halfwin; j++) {
#ifdef __SSE2__
vfloat threshfactorv = F2V(threshfactor);
vfloat chromv = F2V(chrom);
vfloat piBy180v = F2V(RT_PI_F_180);
for(; j < width - halfwin - 3; j+=4) {
if (badpix[i * width + j] < threshfactor) {
float atot = 0.f;
float btot = 0.f;
float norm = 0.f;
float wt;
vmask selMask = vmaskf_lt(LVFU(badpix[i * width + j]), threshfactorv);
if(_mm_movemask_ps((vfloat)selMask)) {
vfloat atotv = ZEROV;
vfloat btotv = ZEROV;
vfloat normv = ZEROV;
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
wt = badpix[i1 * width + j1];
atot += wt * sraa[i1][j1];
btot += wt * srbb[i1][j1];
norm += wt;
}
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
vfloat wtv = LVFU(badpix[i1 * width + j1]);
atotv += wtv * LVFU(sraa[i1][j1]);
btotv += wtv * LVFU(srbb[i1][j1]);
normv += wtv;
}
if(norm > 0.f) {
const float intera = atot / norm;
const float interb = atot / norm;
const float CC = sqrt(SQR(interb) + SQR(intera));
selMask = vandm(selMask, vmaskf_gt(normv, ZEROV));
if(_mm_movemask_ps((vfloat)selMask)) {
vfloat interav = atotv / normv;
vfloat interbv = btotv / normv;
vfloat CCv = vsqrtf(SQRV(interbv) + SQRV(interav));
if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) {
src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
src->C_p[i][j] = CC;
selMask = vandm(selMask, vmaskf_lt(CCv, chromv));
if(_mm_movemask_ps((vfloat)selMask)) {
STVFU(src->h_p[i][j], xatan2f(interbv, interav) / piBy180v);
STVFU(src->C_p[i][j], CCv);
}
}
}
}
}
#endif
for(; j < width - halfwin; j++) {
for(; j < width; j++) {
if (badpix[i * width + j] < threshfactor) {
float atot = 0.f;
float btot = 0.f;
float norm = 0.f;
float wt;
if (badpix[i * width + j] < threshfactor) {
float atot = 0.f;
float btot = 0.f;
float norm = 0.f;
float wt;
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
for (int j1 = j - halfwin + 1; j1 < j + halfwin; j1++) {
wt = badpix[i1 * width + j1];
atot += wt * sraa[i1][j1];
btot += wt * srbb[i1][j1];
norm += wt;
}
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
for (int j1 = j - halfwin + 1; j1 < width; j1++) {
wt = badpix[i1 * width + j1];
atot += wt * sraa[i1][j1];
btot += wt * srbb[i1][j1];
norm += wt;
if(norm > 0.f) {
const float intera = atot / norm;
const float interb = btot / norm;
const float CC = sqrt(SQR(interb) + SQR(intera));
if(CC < chrom) {
src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
src->C_p[i][j] = CC;
}
}
}
}
if(norm > 0.f) {
const float intera = atot / norm;
const float interb = atot / norm;
const float CC = sqrt(SQR(interb) + SQR(intera));
for(; j < width; j++) {
if(hotbad != 0 || (CC < chrom && skinprot != 0.f)) {
src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
src->C_p[i][j] = CC;
if (badpix[i * width + j] < threshfactor) {
float atot = 0.f;
float btot = 0.f;
float norm = 0.f;
float wt;
for (int i1 = max(0, i - halfwin + 1); i1 < min(height, i + halfwin); i1++)
for (int j1 = j - halfwin + 1; j1 < width; j1++) {
wt = badpix[i1 * width + j1];
atot += wt * sraa[i1][j1];
btot += wt * srbb[i1][j1];
norm += wt;
}
if(norm > 0.f) {
const float intera = atot / norm;
const float interb = btot / norm;
const float CC = sqrt(SQR(interb) + SQR(intera));
if(CC < chrom) {
src->h_p[i][j] = xatan2f(interb, intera) / RT_PI_F_180;
src->C_p[i][j] = CC;
}
}
}
}
@@ -994,7 +1031,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl
int j = 0;
for (; j < 2; j++) {
float shfabs = fabs(src->L[i][j] - tmL[i][j]);
float shmed = 0.0f;
float shmed = 0.f;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) {
for (int j1 = 0; j1 <= j + 2; j1++) {
@@ -1020,7 +1057,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl
#endif
for (; j < width - 2; j++) {
float shfabs = fabs(src->L[i][j] - tmL[i][j]);
float shmed = 0.0f;
float shmed = 0.f;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) {
for (int j1 = j - 2; j1 <= j + 2; j1++) {
@@ -1032,7 +1069,7 @@ void ImProcFunctions::BadpixelsLab(LabImage * src, double radius, int thresh, fl
for (; j < width; j++) {
float shfabs = fabs(src->L[i][j] - tmL[i][j]);
float shmed = 0.0f;
float shmed = 0.f;
for (int i1 = max(0, i - 2); i1 <= min(i + 2, height - 1); i1++) {
for (int j1 = j - 2; j1 < width; j1++) {