vng4 demosaic: another small speedup, #4633
This commit is contained in:
@@ -22,12 +22,41 @@
|
|||||||
|
|
||||||
#include "rtengine.h"
|
#include "rtengine.h"
|
||||||
#include "rawimagesource.h"
|
#include "rawimagesource.h"
|
||||||
#include "rawimagesource_i.h"
|
|
||||||
#include "../rtgui/multilangmgr.h"
|
#include "../rtgui/multilangmgr.h"
|
||||||
//#define BENCHMARK
|
//#define BENCHMARK
|
||||||
#include "StopWatch.h"
|
#include "StopWatch.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using namespace rtengine;
|
||||||
|
|
||||||
|
inline void vng4interpolate_row_redblue (const RawImage *ri, const array2D<float> &rawData, float* ar, float* ab, const float * const pg, const float * const cg, const float * const ng, int i, int width)
|
||||||
|
{
|
||||||
|
if (ri->ISBLUE(i, 0) || ri->ISBLUE(i, 1)) {
|
||||||
|
std::swap(ar, ab);
|
||||||
|
}
|
||||||
|
|
||||||
|
// RGRGR or GRGRGR line
|
||||||
|
for (int j = 3; j < width - 3; ++j) {
|
||||||
|
if (!ri->ISGREEN(i, j)) {
|
||||||
|
// keep original value
|
||||||
|
ar[j] = rawData[i][j];
|
||||||
|
// cross interpolation of red/blue
|
||||||
|
float rb = (rawData[i - 1][j - 1] - pg[j - 1] + rawData[i + 1][j - 1] - ng[j - 1]);
|
||||||
|
rb += (rawData[i - 1][j + 1] - pg[j + 1] + rawData[i + 1][j + 1] - ng[j + 1]);
|
||||||
|
ab[j] = cg[j] + rb * 0.25f;
|
||||||
|
} else {
|
||||||
|
// linear R/B-G interpolation horizontally
|
||||||
|
ar[j] = cg[j] + (rawData[i][j - 1] - cg[j - 1] + rawData[i][j + 1] - cg[j + 1]) / 2;
|
||||||
|
// linear B/R-G interpolation vertically
|
||||||
|
ab[j] = cg[j] + (rawData[i - 1][j] - pg[j] + rawData[i + 1][j] - ng[j]) / 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
namespace rtengine
|
namespace rtengine
|
||||||
|
|
||||||
{
|
{
|
||||||
#define fc(row,col) (prefilters >> ((((row) << 1 & 14) + ((col) & 1)) << 1) & 3)
|
#define fc(row,col) (prefilters >> ((((row) << 1 & 14) + ((col) & 1)) << 1) & 3)
|
||||||
|
|
||||||
@@ -74,16 +103,6 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
|
|
||||||
float (*image)[4] = (float (*)[4]) calloc (height * width, sizeof * image);
|
float (*image)[4] = (float (*)[4]) calloc (height * width, sizeof * image);
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
#pragma omp parallel for
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (int ii = 0; ii < H; ii++)
|
|
||||||
for (int jj = 0; jj < W; jj++) {
|
|
||||||
image[ii * W + jj][fc(ii, jj)] = rawData[ii][jj];
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
int lcode[16][16][32];
|
int lcode[16][16][32];
|
||||||
float mul[16][16][8];
|
float mul[16][16][8];
|
||||||
float csum[16][16][3];
|
float csum[16][16][3];
|
||||||
@@ -93,8 +112,7 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
for (int col = 0; col < 16; col++) {
|
for (int col = 0; col < 16; col++) {
|
||||||
int * ip = lcode[row][col];
|
int * ip = lcode[row][col];
|
||||||
int mulcount = 0;
|
int mulcount = 0;
|
||||||
float sum[4];
|
float sum[4] = {};
|
||||||
memset (sum, 0, sizeof sum);
|
|
||||||
|
|
||||||
for (int y = -1; y <= 1; y++)
|
for (int y = -1; y <= 1; y++)
|
||||||
for (int x = -1; x <= 1; x++) {
|
for (int x = -1; x <= 1; x++) {
|
||||||
@@ -124,15 +142,30 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp parallel for
|
#pragma omp parallel
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
int firstRow = -1;
|
||||||
|
int lastRow = -1;
|
||||||
|
#ifdef _OPENMP
|
||||||
|
// note, static scheduling is important in this implementation
|
||||||
|
#pragma omp for schedule(static)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int row = 1; row < height - 1; row++) {
|
for (int ii = 0; ii < H; ii++) {
|
||||||
|
if (firstRow == -1) {
|
||||||
|
firstRow = ii;
|
||||||
|
}
|
||||||
|
lastRow = ii;
|
||||||
|
for (int jj = 0; jj < W; jj++) {
|
||||||
|
image[ii * W + jj][fc(ii, jj)] = rawData[ii][jj];
|
||||||
|
}
|
||||||
|
if (ii - 1 > firstRow) {
|
||||||
|
int row = ii - 1;
|
||||||
for (int col = 1; col < width - 1; col++) {
|
for (int col = 1; col < width - 1; col++) {
|
||||||
float * pix = image[row * width + col];
|
float * pix = image[row * width + col];
|
||||||
int * ip = lcode[row & 15][col & 15];
|
int * ip = lcode[row & 15][col & 15];
|
||||||
float sum[4];
|
float sum[4] = {};
|
||||||
memset (sum, 0, sizeof sum);
|
|
||||||
|
|
||||||
for (int i = 0; i < 8; i++, ip += 2) {
|
for (int i = 0; i < 8; i++, ip += 2) {
|
||||||
sum[ip[1]] += pix[ip[0]] * mul[row & 15][col & 15][i];
|
sum[ip[1]] += pix[ip[0]] * mul[row & 15][col & 15][i];
|
||||||
@@ -145,16 +178,52 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int prow = 7, pcol = 1;
|
// now all rows are processed except the first and last row of each chunk
|
||||||
int *code[8][2];
|
// let's process them now but skip row 0 and row H - 1
|
||||||
int t, g;
|
if (firstRow > 0 && firstRow < H - 1) {
|
||||||
int * ip = (int *) calloc ((prow + 1) * (pcol + 1), 1280);
|
const int row = firstRow;
|
||||||
|
for (int col = 1; col < width - 1; col++) {
|
||||||
|
float * pix = image[row * width + col];
|
||||||
|
int * ip = lcode[row & 15][col & 15];
|
||||||
|
float sum[4] = {};
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++, ip += 2) {
|
||||||
|
sum[ip[1]] += pix[ip[0]] * mul[row & 15][col & 15][i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < colors - 1; i++, ip++) {
|
||||||
|
pix[ip[0]] = sum[ip[0]] * csum[row & 15][col & 15][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastRow > 0 && lastRow < H - 1) {
|
||||||
|
const int row = lastRow;
|
||||||
|
for (int col = 1; col < width - 1; col++) {
|
||||||
|
float * pix = image[row * width + col];
|
||||||
|
int * ip = lcode[row & 15][col & 15];
|
||||||
|
float sum[4] = {};
|
||||||
|
|
||||||
|
for (int i = 0; i < 8; i++, ip += 2) {
|
||||||
|
sum[ip[1]] += pix[ip[0]] * mul[row & 15][col & 15][i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned int i = 0; i < colors - 1; i++, ip++) {
|
||||||
|
pix[ip[0]] = sum[ip[0]] * csum[row & 15][col & 15][i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr int prow = 7, pcol = 1;
|
||||||
|
int32_t *code[8][2];
|
||||||
|
int32_t * ip = (int32_t *) calloc ((prow + 1) * (pcol + 1), 1280);
|
||||||
|
|
||||||
for (int row = 0; row <= prow; row++) /* Precalculate for VNG */
|
for (int row = 0; row <= prow; row++) /* Precalculate for VNG */
|
||||||
for (int col = 0; col <= pcol; col++) {
|
for (int col = 0; col <= pcol; col++) {
|
||||||
code[row][col] = ip;
|
code[row][col] = ip;
|
||||||
|
cp = terms;
|
||||||
for (cp = terms, t = 0; t < 64; t++) {
|
for (int t = 0; t < 64; t++) {
|
||||||
int y1 = *cp++;
|
int y1 = *cp++;
|
||||||
int x1 = *cp++;
|
int x1 = *cp++;
|
||||||
int y2 = *cp++;
|
int y2 = *cp++;
|
||||||
@@ -175,9 +244,13 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
|
|
||||||
*ip++ = (y1 * width + x1) * 4 + color;
|
*ip++ = (y1 * width + x1) * 4 + color;
|
||||||
*ip++ = (y2 * width + x2) * 4 + color;
|
*ip++ = (y2 * width + x2) * 4 + color;
|
||||||
|
#ifdef __SSE2__
|
||||||
|
// at least on machines with SSE2 feature this cast is save
|
||||||
|
*reinterpret_cast<float*>(ip++) = 1 << weight;
|
||||||
|
#else
|
||||||
*ip++ = 1 << weight;
|
*ip++ = 1 << weight;
|
||||||
|
#endif
|
||||||
for (g = 0; g < 8; g++)
|
for (int g = 0; g < 8; g++)
|
||||||
if (grads & (1 << g)) {
|
if (grads & (1 << g)) {
|
||||||
*ip++ = g;
|
*ip++ = g;
|
||||||
}
|
}
|
||||||
@@ -187,7 +260,8 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
|
|
||||||
*ip++ = INT_MAX;
|
*ip++ = INT_MAX;
|
||||||
|
|
||||||
for (cp = chood, g = 0; g < 8; g++) {
|
cp = chood;
|
||||||
|
for (int g = 0; g < 8; g++) {
|
||||||
int y = *cp++;
|
int y = *cp++;
|
||||||
int x = *cp++;
|
int x = *cp++;
|
||||||
*ip++ = (y * width + x) * 4;
|
*ip++ = (y * width + x) * 4;
|
||||||
@@ -202,7 +276,7 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(plistenerActive) {
|
if(plistenerActive) {
|
||||||
progress = 0.1;
|
progress = 0.2;
|
||||||
plistener->setProgress (progress);
|
plistener->setProgress (progress);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -211,7 +285,7 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
constexpr int progressStep = 64;
|
constexpr int progressStep = 64;
|
||||||
const double progressInc = (0.98 - progress) / ((height - 2) / progressStep);
|
const double progressInc = (1.0 - progress) / ((height - 2) / progressStep);
|
||||||
int firstRow = -1;
|
int firstRow = -1;
|
||||||
int lastRow = -1;
|
int lastRow = -1;
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
@@ -227,11 +301,16 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
for (int col = 2; col < width - 2; col++) {
|
for (int col = 2; col < width - 2; col++) {
|
||||||
float * pix = image[row * width + col];
|
float * pix = image[row * width + col];
|
||||||
int color = fc(row, col);
|
int color = fc(row, col);
|
||||||
int * ip = code[row & prow][col & pcol];
|
int32_t * ip = code[row & prow][col & pcol];
|
||||||
float gval[8] = {};
|
float gval[8] = {};
|
||||||
|
|
||||||
while (ip[0] != INT_MAX) { /* Calculate gradients */
|
while (ip[0] != INT_MAX) { /* Calculate gradients */
|
||||||
|
#ifdef __SSE2__
|
||||||
|
// at least on machines with SSE2 feature this cast is save and saves a lot of int => float conversions
|
||||||
|
const float diff = std::fabs(pix[ip[0]] - pix[ip[1]]) * reinterpret_cast<float*>(ip)[2];
|
||||||
|
#else
|
||||||
const float diff = std::fabs(pix[ip[0]] - pix[ip[1]]) * ip[2];
|
const float diff = std::fabs(pix[ip[0]] - pix[ip[1]]) * ip[2];
|
||||||
|
#endif
|
||||||
gval[ip[3]] += diff;
|
gval[ip[3]] += diff;
|
||||||
ip += 5;
|
ip += 5;
|
||||||
if (UNLIKELY(ip[-1] != -1)) {
|
if (UNLIKELY(ip[-1] != -1)) {
|
||||||
@@ -246,17 +325,18 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
|
|
||||||
float sum0 = 0.f;
|
float sum0 = 0.f;
|
||||||
float sum1 = 0.f;
|
float sum1 = 0.f;
|
||||||
float greenval = pix[color];
|
const float greenval = pix[color];
|
||||||
int num = 0;
|
int num = 0;
|
||||||
|
|
||||||
if(color & 1) {
|
if(color & 1) {
|
||||||
|
color ^= 2;
|
||||||
for (int g = 0; g < 8; g++, ip += 2) { /* Average the neighbors */
|
for (int g = 0; g < 8; g++, ip += 2) { /* Average the neighbors */
|
||||||
if (gval[g] <= thold) {
|
if (gval[g] <= thold) {
|
||||||
if(ip[1]) {
|
if(ip[1]) {
|
||||||
sum0 += greenval + pix[ip[1]];
|
sum0 += greenval + pix[ip[1]];
|
||||||
}
|
}
|
||||||
|
|
||||||
sum1 += pix[ip[0] + (color ^ 2)];
|
sum1 += pix[ip[0] + color];
|
||||||
num++;
|
num++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -276,7 +356,7 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
green[row][col] = greenval + (sum1 - sum0) / (2 * num);
|
green[row][col] = greenval + (sum1 - sum0) / (2 * num);
|
||||||
}
|
}
|
||||||
if (row - 1 > firstRow) {
|
if (row - 1 > firstRow) {
|
||||||
interpolate_row_rb_mul_pp(rawData, red[row - 1], blue[row - 1], green[row - 2], green[row - 1], green[row], row - 1, 1.0, 1.0, 1.0, 0, W, 1);
|
vng4interpolate_row_redblue(ri, rawData, red[row - 1], blue[row - 1], green[row - 2], green[row - 1], green[row], row - 1, W);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(plistenerActive) {
|
if(plistenerActive) {
|
||||||
@@ -292,11 +372,11 @@ void RawImageSource::vng4_demosaic (const array2D<float> &rawData, array2D<float
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (firstRow > 2 && firstRow < H - 3) {
|
if (firstRow > 2 && firstRow < H - 3) {
|
||||||
interpolate_row_rb_mul_pp(rawData, red[firstRow], blue[firstRow], green[firstRow - 1], green[firstRow], green[firstRow + 1], firstRow, 1.0, 1.0, 1.0, 0, W, 1);
|
vng4interpolate_row_redblue(ri, rawData, red[firstRow], blue[firstRow], green[firstRow - 1], green[firstRow], green[firstRow + 1], firstRow, W);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lastRow > 2 && lastRow < H - 3) {
|
if (lastRow > 2 && lastRow < H - 3) {
|
||||||
interpolate_row_rb_mul_pp(rawData, red[lastRow], blue[lastRow], green[lastRow - 1], green[lastRow], green[lastRow + 1], lastRow, 1.0, 1.0, 1.0, 0, W, 1);
|
vng4interpolate_row_redblue(ri, rawData, red[lastRow], blue[lastRow], green[lastRow - 1], green[lastRow], green[lastRow + 1], lastRow, W);
|
||||||
}
|
}
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp single
|
#pragma omp single
|
||||||
|
Reference in New Issue
Block a user