rcd_demosaic: further speedup

This commit is contained in:
heckflosse
2018-02-28 14:53:02 +01:00
parent bad28bb0ad
commit 2fb4403823

View File

@@ -51,7 +51,7 @@ void RawImageSource::rcd_demosaic()
const int width = W, height = H;
constexpr int tileBorder = 8;
constexpr int tileSize = 228;
constexpr int tileSize = 220;
constexpr int tileSizeN = tileSize - 2 * tileBorder;
const int numTh = H / (tileSizeN) + ((H % (tileSizeN)) ? 1 : 0);
const int numTw = W / (tileSizeN) + ((W % (tileSizeN)) ? 1 : 0);
@@ -101,10 +101,11 @@ void RawImageSource::rcd_demosaic()
for (int row = 4; row < tileSize - 4; row++) {
for (int col = 4, indx = row * tileSize + col; col < tileSize - 4; col++, indx++) {
const float cfai = cfa[indx];
//Calculate h/v local discrimination
float V_Stat = max(epssq, - 18.0f * cfa[indx] * cfa[indx - w1] - 18.0f * cfa[indx] * cfa[indx + w1] - 36.0f * cfa[indx] * cfa[indx - w2] - 36.0f * cfa[indx] * cfa[indx + w2] + 18.0f * cfa[indx] * cfa[indx - w3] + 18.0f * cfa[indx] * cfa[indx + w3] - 2.0f * cfa[indx] * cfa[indx - w4] - 2.0f * cfa[indx] * cfa[indx + w4] + 38.0f * cfa[indx] * cfa[indx] - 70.0f * cfa[indx - w1] * cfa[indx + w1] - 12.0f * cfa[indx - w1] * cfa[indx - w2] + 24.0f * cfa[indx - w1] * cfa[indx + w2] - 38.0f * cfa[indx - w1] * cfa[indx - w3] + 16.0f * cfa[indx - w1] * cfa[indx + w3] + 12.0f * cfa[indx - w1] * cfa[indx - w4] - 6.0f * cfa[indx - w1] * cfa[indx + w4] + 46.0f * cfa[indx - w1] * cfa[indx - w1] + 24.0f * cfa[indx + w1] * cfa[indx - w2] - 12.0f * cfa[indx + w1] * cfa[indx + w2] + 16.0f * cfa[indx + w1] * cfa[indx - w3] - 38.0f * cfa[indx + w1] * cfa[indx + w3] - 6.0f * cfa[indx + w1] * cfa[indx - w4] + 12.0f * cfa[indx + w1] * cfa[indx + w4] + 46.0f * cfa[indx + w1] * cfa[indx + w1] + 14.0f * cfa[indx - w2] * cfa[indx + w2] - 12.0f * cfa[indx - w2] * cfa[indx + w3] - 2.0f * cfa[indx - w2] * cfa[indx - w4] + 2.0f * cfa[indx - w2] * cfa[indx + w4] + 11.0f * cfa[indx - w2] * cfa[indx - w2] - 12.0f * cfa[indx + w2] * cfa[indx - w3] + 2.0f * cfa[indx + w2] * cfa[indx - w4] - 2.0f * cfa[indx + w2] * cfa[indx + w4] + 11.0f * cfa[indx + w2] * cfa[indx + w2] + 2.0f * cfa[indx - w3] * cfa[indx + w3] - 6.0f * cfa[indx - w3] * cfa[indx - w4] + 10.0f * cfa[indx - w3] * cfa[indx - w3] - 6.0f * cfa[indx + w3] * cfa[indx + w4] + 10.0f * cfa[indx + w3] * cfa[indx + w3] + 1.0f * cfa[indx - w4] * cfa[indx - w4] + 1.0f * cfa[indx + w4] * cfa[indx + w4]);
float V_Stat = max(epssq, - 18.f * cfai * (cfa[indx - w1] + cfa[indx + w1] + 2.f * (cfa[indx - w2] + cfa[indx + w2]) - cfa[indx - w3] - cfa[indx + w3]) - 2.f * cfai * (cfa[indx - w4] + cfa[indx + w4] - 19.f * cfai) - cfa[indx - w1] * (70.f * cfa[indx + w1] + 12.f * cfa[indx - w2] - 24.f * cfa[indx + w2] + 38.f * cfa[indx - w3] - 16.f * cfa[indx + w3] - 12.f * cfa[indx - w4] + 6.f * cfa[indx + w4] - 46.f * cfa[indx - w1]) + cfa[indx + w1] * (24.f * cfa[indx - w2] - 12.f * cfa[indx + w2] + 16.f * cfa[indx - w3] - 38.f * cfa[indx + w3] - 6.f * cfa[indx - w4] + 12.f * cfa[indx + w4] + 46.f * cfa[indx + w1]) + cfa[indx - w2] * (14.f * cfa[indx + w2] - 12.f * cfa[indx + w3] - 2.f * cfa[indx - w4] + 2.f * cfa[indx + w4] + 11.f * cfa[indx - w2]) + cfa[indx + w2] * (-12.f * cfa[indx - w3] + 2.f * cfa[indx - w4] - 2.f * cfa[indx + w4] + 11.f * cfa[indx + w2]) + cfa[indx - w3] * (2.f * cfa[indx + w3] - 6.f * cfa[indx - w4] + 10.f * cfa[indx - w3]) + cfa[indx + w3] * (-6.f * cfa[indx + w4] + 10.f * cfa[indx + w3]) + cfa[indx - w4] * cfa[indx - w4] + cfa[indx + w4] * cfa[indx + w4]);
float H_Stat = max(epssq, - 18.0f * cfa[indx] * cfa[indx - 1] - 18.0f * cfa[indx] * cfa[indx + 1] - 36.0f * cfa[indx] * cfa[indx - 2] - 36.0f * cfa[indx] * cfa[indx + 2] + 18.0f * cfa[indx] * cfa[indx - 3] + 18.0f * cfa[indx] * cfa[indx + 3] - 2.0f * cfa[indx] * cfa[indx - 4] - 2.0f * cfa[indx] * cfa[indx + 4] + 38.0f * cfa[indx] * cfa[indx] - 70.0f * cfa[indx - 1] * cfa[indx + 1] - 12.0f * cfa[indx - 1] * cfa[indx - 2] + 24.0f * cfa[indx - 1] * cfa[indx + 2] - 38.0f * cfa[indx - 1] * cfa[indx - 3] + 16.0f * cfa[indx - 1] * cfa[indx + 3] + 12.0f * cfa[indx - 1] * cfa[indx - 4] - 6.0f * cfa[indx - 1] * cfa[indx + 4] + 46.0f * cfa[indx - 1] * cfa[indx - 1] + 24.0f * cfa[indx + 1] * cfa[indx - 2] - 12.0f * cfa[indx + 1] * cfa[indx + 2] + 16.0f * cfa[indx + 1] * cfa[indx - 3] - 38.0f * cfa[indx + 1] * cfa[indx + 3] - 6.0f * cfa[indx + 1] * cfa[indx - 4] + 12.0f * cfa[indx + 1] * cfa[indx + 4] + 46.0f * cfa[indx + 1] * cfa[indx + 1] + 14.0f * cfa[indx - 2] * cfa[indx + 2] - 12.0f * cfa[indx - 2] * cfa[indx + 3] - 2.0f * cfa[indx - 2] * cfa[indx - 4] + 2.0f * cfa[indx - 2] * cfa[indx + 4] + 11.0f * cfa[indx - 2] * cfa[indx - 2] - 12.0f * cfa[indx + 2] * cfa[indx - 3] + 2.0f * cfa[indx + 2] * cfa[indx - 4] - 2.0f * cfa[indx + 2] * cfa[indx + 4] + 11.0f * cfa[indx + 2] * cfa[indx + 2] + 2.0f * cfa[indx - 3] * cfa[indx + 3] - 6.0f * cfa[indx - 3] * cfa[indx - 4] + 10.0f * cfa[indx - 3] * cfa[indx - 3] - 6.0f * cfa[indx + 3] * cfa[indx + 4] + 10.0f * cfa[indx + 3] * cfa[indx + 3] + 1.0f * cfa[indx - 4] * cfa[indx - 4] + 1.0f * cfa[indx + 4] * cfa[indx + 4]);
float H_Stat = max(epssq, - 18.f * cfai * (cfa[indx - 1] + cfa[indx + 1] + 2.f * (cfa[indx - 2] + cfa[indx + 2]) - cfa[indx - 3] - cfa[indx + 3]) - 2.f * cfai * (cfa[indx - 4] + cfa[indx + 4] - 19.f * cfai) - cfa[indx - 1] * (70.f * cfa[indx + 1] + 12.f * cfa[indx - 2] - 24.f * cfa[indx + 2] + 38.f * cfa[indx - 3] - 16.f * cfa[indx + 3] - 12.f * cfa[indx - 4] + 6.f * cfa[indx + 4] - 46.f * cfa[indx - 1]) + cfa[indx + 1] * (24.f * cfa[indx - 2] - 12.f * cfa[indx + 2] + 16.f * cfa[indx - 3] - 38.f * cfa[indx + 3] - 6.f * cfa[indx - 4] + 12.f * cfa[indx + 4] + 46.f * cfa[indx + 1]) + cfa[indx - 2] * (14.f * cfa[indx + 2] - 12.f * cfa[indx + 3] - 2.f * cfa[indx - 4] + 2.f * cfa[indx + 4] + 11.f * cfa[indx - 2]) + cfa[indx + 2] * (-12.f * cfa[indx - 3] + 2.f * cfa[indx - 4] - 2.f * cfa[indx + 4] + 11.f * cfa[indx + 2]) + cfa[indx - 3] * (2.f * cfa[indx + 3] - 6.f * cfa[indx - 4] + 10.f * cfa[indx - 3]) + cfa[indx + 3] * (-6.f * cfa[indx + 4] + 10.f * cfa[indx + 3]) + cfa[indx - 4] * cfa[indx - 4] + cfa[indx + 4] * cfa[indx + 4]);
VH_Dir[indx] = V_Stat / (V_Stat + H_Stat);
}
@@ -216,9 +217,8 @@ void RawImageSource::rcd_demosaic()
float S1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx + w2]);
float W1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx - 2]);
float E1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx + 2]);
for (int c = 0; c <= 2; c += 2) {
// Cardinal gradients
float N_Grad = N1 + std::fabs(rgb[c][indx - w1] - rgb[c][indx + w1]) + std::fabs(rgb[c][indx - w1] - rgb[c][indx - w3]);
float S_Grad = S1 + std::fabs(rgb[c][indx - w1] - rgb[c][indx + w1]) + std::fabs(rgb[c][indx + w1] - rgb[c][indx + w3]);