rcd_demosaic: further speedup
This commit is contained in:
@@ -51,7 +51,7 @@ void RawImageSource::rcd_demosaic()
|
|||||||
|
|
||||||
const int width = W, height = H;
|
const int width = W, height = H;
|
||||||
constexpr int tileBorder = 8;
|
constexpr int tileBorder = 8;
|
||||||
constexpr int tileSize = 228;
|
constexpr int tileSize = 220;
|
||||||
constexpr int tileSizeN = tileSize - 2 * tileBorder;
|
constexpr int tileSizeN = tileSize - 2 * tileBorder;
|
||||||
const int numTh = H / (tileSizeN) + ((H % (tileSizeN)) ? 1 : 0);
|
const int numTh = H / (tileSizeN) + ((H % (tileSizeN)) ? 1 : 0);
|
||||||
const int numTw = W / (tileSizeN) + ((W % (tileSizeN)) ? 1 : 0);
|
const int numTw = W / (tileSizeN) + ((W % (tileSizeN)) ? 1 : 0);
|
||||||
@@ -101,10 +101,11 @@ void RawImageSource::rcd_demosaic()
|
|||||||
|
|
||||||
for (int row = 4; row < tileSize - 4; row++) {
|
for (int row = 4; row < tileSize - 4; row++) {
|
||||||
for (int col = 4, indx = row * tileSize + col; col < tileSize - 4; col++, indx++) {
|
for (int col = 4, indx = row * tileSize + col; col < tileSize - 4; col++, indx++) {
|
||||||
|
const float cfai = cfa[indx];
|
||||||
//Calculate h/v local discrimination
|
//Calculate h/v local discrimination
|
||||||
float V_Stat = max(epssq, - 18.0f * cfa[indx] * cfa[indx - w1] - 18.0f * cfa[indx] * cfa[indx + w1] - 36.0f * cfa[indx] * cfa[indx - w2] - 36.0f * cfa[indx] * cfa[indx + w2] + 18.0f * cfa[indx] * cfa[indx - w3] + 18.0f * cfa[indx] * cfa[indx + w3] - 2.0f * cfa[indx] * cfa[indx - w4] - 2.0f * cfa[indx] * cfa[indx + w4] + 38.0f * cfa[indx] * cfa[indx] - 70.0f * cfa[indx - w1] * cfa[indx + w1] - 12.0f * cfa[indx - w1] * cfa[indx - w2] + 24.0f * cfa[indx - w1] * cfa[indx + w2] - 38.0f * cfa[indx - w1] * cfa[indx - w3] + 16.0f * cfa[indx - w1] * cfa[indx + w3] + 12.0f * cfa[indx - w1] * cfa[indx - w4] - 6.0f * cfa[indx - w1] * cfa[indx + w4] + 46.0f * cfa[indx - w1] * cfa[indx - w1] + 24.0f * cfa[indx + w1] * cfa[indx - w2] - 12.0f * cfa[indx + w1] * cfa[indx + w2] + 16.0f * cfa[indx + w1] * cfa[indx - w3] - 38.0f * cfa[indx + w1] * cfa[indx + w3] - 6.0f * cfa[indx + w1] * cfa[indx - w4] + 12.0f * cfa[indx + w1] * cfa[indx + w4] + 46.0f * cfa[indx + w1] * cfa[indx + w1] + 14.0f * cfa[indx - w2] * cfa[indx + w2] - 12.0f * cfa[indx - w2] * cfa[indx + w3] - 2.0f * cfa[indx - w2] * cfa[indx - w4] + 2.0f * cfa[indx - w2] * cfa[indx + w4] + 11.0f * cfa[indx - w2] * cfa[indx - w2] - 12.0f * cfa[indx + w2] * cfa[indx - w3] + 2.0f * cfa[indx + w2] * cfa[indx - w4] - 2.0f * cfa[indx + w2] * cfa[indx + w4] + 11.0f * cfa[indx + w2] * cfa[indx + w2] + 2.0f * cfa[indx - w3] * cfa[indx + w3] - 6.0f * cfa[indx - w3] * cfa[indx - w4] + 10.0f * cfa[indx - w3] * cfa[indx - w3] - 6.0f * cfa[indx + w3] * cfa[indx + w4] + 10.0f * cfa[indx + w3] * cfa[indx + w3] + 1.0f * cfa[indx - w4] * cfa[indx - w4] + 1.0f * cfa[indx + w4] * cfa[indx + w4]);
|
float V_Stat = max(epssq, - 18.f * cfai * (cfa[indx - w1] + cfa[indx + w1] + 2.f * (cfa[indx - w2] + cfa[indx + w2]) - cfa[indx - w3] - cfa[indx + w3]) - 2.f * cfai * (cfa[indx - w4] + cfa[indx + w4] - 19.f * cfai) - cfa[indx - w1] * (70.f * cfa[indx + w1] + 12.f * cfa[indx - w2] - 24.f * cfa[indx + w2] + 38.f * cfa[indx - w3] - 16.f * cfa[indx + w3] - 12.f * cfa[indx - w4] + 6.f * cfa[indx + w4] - 46.f * cfa[indx - w1]) + cfa[indx + w1] * (24.f * cfa[indx - w2] - 12.f * cfa[indx + w2] + 16.f * cfa[indx - w3] - 38.f * cfa[indx + w3] - 6.f * cfa[indx - w4] + 12.f * cfa[indx + w4] + 46.f * cfa[indx + w1]) + cfa[indx - w2] * (14.f * cfa[indx + w2] - 12.f * cfa[indx + w3] - 2.f * cfa[indx - w4] + 2.f * cfa[indx + w4] + 11.f * cfa[indx - w2]) + cfa[indx + w2] * (-12.f * cfa[indx - w3] + 2.f * cfa[indx - w4] - 2.f * cfa[indx + w4] + 11.f * cfa[indx + w2]) + cfa[indx - w3] * (2.f * cfa[indx + w3] - 6.f * cfa[indx - w4] + 10.f * cfa[indx - w3]) + cfa[indx + w3] * (-6.f * cfa[indx + w4] + 10.f * cfa[indx + w3]) + cfa[indx - w4] * cfa[indx - w4] + cfa[indx + w4] * cfa[indx + w4]);
|
||||||
|
|
||||||
float H_Stat = max(epssq, - 18.0f * cfa[indx] * cfa[indx - 1] - 18.0f * cfa[indx] * cfa[indx + 1] - 36.0f * cfa[indx] * cfa[indx - 2] - 36.0f * cfa[indx] * cfa[indx + 2] + 18.0f * cfa[indx] * cfa[indx - 3] + 18.0f * cfa[indx] * cfa[indx + 3] - 2.0f * cfa[indx] * cfa[indx - 4] - 2.0f * cfa[indx] * cfa[indx + 4] + 38.0f * cfa[indx] * cfa[indx] - 70.0f * cfa[indx - 1] * cfa[indx + 1] - 12.0f * cfa[indx - 1] * cfa[indx - 2] + 24.0f * cfa[indx - 1] * cfa[indx + 2] - 38.0f * cfa[indx - 1] * cfa[indx - 3] + 16.0f * cfa[indx - 1] * cfa[indx + 3] + 12.0f * cfa[indx - 1] * cfa[indx - 4] - 6.0f * cfa[indx - 1] * cfa[indx + 4] + 46.0f * cfa[indx - 1] * cfa[indx - 1] + 24.0f * cfa[indx + 1] * cfa[indx - 2] - 12.0f * cfa[indx + 1] * cfa[indx + 2] + 16.0f * cfa[indx + 1] * cfa[indx - 3] - 38.0f * cfa[indx + 1] * cfa[indx + 3] - 6.0f * cfa[indx + 1] * cfa[indx - 4] + 12.0f * cfa[indx + 1] * cfa[indx + 4] + 46.0f * cfa[indx + 1] * cfa[indx + 1] + 14.0f * cfa[indx - 2] * cfa[indx + 2] - 12.0f * cfa[indx - 2] * cfa[indx + 3] - 2.0f * cfa[indx - 2] * cfa[indx - 4] + 2.0f * cfa[indx - 2] * cfa[indx + 4] + 11.0f * cfa[indx - 2] * cfa[indx - 2] - 12.0f * cfa[indx + 2] * cfa[indx - 3] + 2.0f * cfa[indx + 2] * cfa[indx - 4] - 2.0f * cfa[indx + 2] * cfa[indx + 4] + 11.0f * cfa[indx + 2] * cfa[indx + 2] + 2.0f * cfa[indx - 3] * cfa[indx + 3] - 6.0f * cfa[indx - 3] * cfa[indx - 4] + 10.0f * cfa[indx - 3] * cfa[indx - 3] - 6.0f * cfa[indx + 3] * cfa[indx + 4] + 10.0f * cfa[indx + 3] * cfa[indx + 3] + 1.0f * cfa[indx - 4] * cfa[indx - 4] + 1.0f * cfa[indx + 4] * cfa[indx + 4]);
|
float H_Stat = max(epssq, - 18.f * cfai * (cfa[indx - 1] + cfa[indx + 1] + 2.f * (cfa[indx - 2] + cfa[indx + 2]) - cfa[indx - 3] - cfa[indx + 3]) - 2.f * cfai * (cfa[indx - 4] + cfa[indx + 4] - 19.f * cfai) - cfa[indx - 1] * (70.f * cfa[indx + 1] + 12.f * cfa[indx - 2] - 24.f * cfa[indx + 2] + 38.f * cfa[indx - 3] - 16.f * cfa[indx + 3] - 12.f * cfa[indx - 4] + 6.f * cfa[indx + 4] - 46.f * cfa[indx - 1]) + cfa[indx + 1] * (24.f * cfa[indx - 2] - 12.f * cfa[indx + 2] + 16.f * cfa[indx - 3] - 38.f * cfa[indx + 3] - 6.f * cfa[indx - 4] + 12.f * cfa[indx + 4] + 46.f * cfa[indx + 1]) + cfa[indx - 2] * (14.f * cfa[indx + 2] - 12.f * cfa[indx + 3] - 2.f * cfa[indx - 4] + 2.f * cfa[indx + 4] + 11.f * cfa[indx - 2]) + cfa[indx + 2] * (-12.f * cfa[indx - 3] + 2.f * cfa[indx - 4] - 2.f * cfa[indx + 4] + 11.f * cfa[indx + 2]) + cfa[indx - 3] * (2.f * cfa[indx + 3] - 6.f * cfa[indx - 4] + 10.f * cfa[indx - 3]) + cfa[indx + 3] * (-6.f * cfa[indx + 4] + 10.f * cfa[indx + 3]) + cfa[indx - 4] * cfa[indx - 4] + cfa[indx + 4] * cfa[indx + 4]);
|
||||||
|
|
||||||
VH_Dir[indx] = V_Stat / (V_Stat + H_Stat);
|
VH_Dir[indx] = V_Stat / (V_Stat + H_Stat);
|
||||||
}
|
}
|
||||||
@@ -216,9 +217,8 @@ void RawImageSource::rcd_demosaic()
|
|||||||
float S1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx + w2]);
|
float S1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx + w2]);
|
||||||
float W1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx - 2]);
|
float W1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx - 2]);
|
||||||
float E1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx + 2]);
|
float E1 = eps + std::fabs(rgb[1][indx] - rgb[1][indx + 2]);
|
||||||
|
|
||||||
for (int c = 0; c <= 2; c += 2) {
|
for (int c = 0; c <= 2; c += 2) {
|
||||||
|
|
||||||
|
|
||||||
// Cardinal gradients
|
// Cardinal gradients
|
||||||
float N_Grad = N1 + std::fabs(rgb[c][indx - w1] - rgb[c][indx + w1]) + std::fabs(rgb[c][indx - w1] - rgb[c][indx - w3]);
|
float N_Grad = N1 + std::fabs(rgb[c][indx - w1] - rgb[c][indx + w1]) + std::fabs(rgb[c][indx - w1] - rgb[c][indx - w3]);
|
||||||
float S_Grad = S1 + std::fabs(rgb[c][indx - w1] - rgb[c][indx + w1]) + std::fabs(rgb[c][indx + w1] - rgb[c][indx + w3]);
|
float S_Grad = S1 + std::fabs(rgb[c][indx - w1] - rgb[c][indx + w1]) + std::fabs(rgb[c][indx + w1] - rgb[c][indx + w3]);
|
||||||
|
Reference in New Issue
Block a user