xtrans demosaic: 4% speedup and disabled CLIP
This commit is contained in:
parent
db275b1f91
commit
bfc0505320
@ -36,7 +36,8 @@
|
|||||||
#include "procparams.h"
|
#include "procparams.h"
|
||||||
#include "sleef.c"
|
#include "sleef.c"
|
||||||
#include "opthelper.h"
|
#include "opthelper.h"
|
||||||
|
#define BENCHMARK
|
||||||
|
#include "StopWatch.h"
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#endif
|
#endif
|
||||||
@ -3886,12 +3887,12 @@ const float d65_white[3] = { 0.950456, 1, 1.088754 };
|
|||||||
|
|
||||||
void RawImageSource::cielab (const float (*rgb)[3], float* l, float* a, float *b, const int width, const int height, const int labWidth, const float xyz_cam[3][3])
|
void RawImageSource::cielab (const float (*rgb)[3], float* l, float* a, float *b, const int width, const int height, const int labWidth, const float xyz_cam[3][3])
|
||||||
{
|
{
|
||||||
static LUTf cbrt(0x10000);
|
static LUTf cbrt(0x14000);
|
||||||
static bool cbrtinit = false;
|
static bool cbrtinit = false;
|
||||||
|
|
||||||
if (!rgb) {
|
if (!rgb) {
|
||||||
if(!cbrtinit) {
|
if(!cbrtinit) {
|
||||||
for (int i = 0; i < 0x10000; i++) {
|
for (int i = 0; i < 0x14000; i++) {
|
||||||
double r = i / 65535.0;
|
double r = i / 65535.0;
|
||||||
cbrt[i] = r > 0.008856f ? std::cbrt(r) : 7.787f * r + 16.f / 116.f;
|
cbrt[i] = r > 0.008856f ? std::cbrt(r) : 7.787f * r + 16.f / 116.f;
|
||||||
}
|
}
|
||||||
@ -4014,10 +4015,11 @@ void RawImageSource::xtransborder_interpolate (int border)
|
|||||||
Frank Markesteijn's algorithm for Fuji X-Trans sensors
|
Frank Markesteijn's algorithm for Fuji X-Trans sensors
|
||||||
adapted to RT by Ingo Weyrich 2014
|
adapted to RT by Ingo Weyrich 2014
|
||||||
*/
|
*/
|
||||||
|
// override CLIP function to test unclipped output
|
||||||
|
#define CLIP(x) (x)
|
||||||
void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
||||||
{
|
{
|
||||||
|
BENCHFUN
|
||||||
constexpr int ts = 114; /* Tile Size */
|
constexpr int ts = 114; /* Tile Size */
|
||||||
constexpr int tsh = ts / 2; /* half of Tile Size */
|
constexpr int tsh = ts / 2; /* half of Tile Size */
|
||||||
|
|
||||||
@ -4140,6 +4142,7 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
|||||||
uint8_t (*homo)[ts][ts] = (uint8_t (*)[ts][ts]) (lab); // we can reuse the lab-buffer because they are not used together
|
uint8_t (*homo)[ts][ts] = (uint8_t (*)[ts][ts]) (lab); // we can reuse the lab-buffer because they are not used together
|
||||||
s_minmaxgreen (*greenminmaxtile)[tsh] = (s_minmaxgreen(*)[tsh]) (lab); // we can reuse the lab-buffer because they are not used together
|
s_minmaxgreen (*greenminmaxtile)[tsh] = (s_minmaxgreen(*)[tsh]) (lab); // we can reuse the lab-buffer because they are not used together
|
||||||
uint8_t (*homosum)[ts][ts] = (uint8_t (*)[ts][ts]) (drv); // we can reuse the drv-buffer because they are not used together
|
uint8_t (*homosum)[ts][ts] = (uint8_t (*)[ts][ts]) (drv); // we can reuse the drv-buffer because they are not used together
|
||||||
|
uint8_t (*homosummax)[ts] = (uint8_t (*)[ts]) homo[ndir - 1]; // we can reuse the homo-buffer because they are not used together
|
||||||
|
|
||||||
#ifdef _OPENMP
|
#ifdef _OPENMP
|
||||||
#pragma omp for collapse(2) schedule(dynamic) nowait
|
#pragma omp for collapse(2) schedule(dynamic) nowait
|
||||||
@ -4284,9 +4287,13 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int col = leftstart; col < mcol; col += coloffset, coloffset ^= 3) {
|
short *hexmod[2];
|
||||||
|
hexmod[0] = allhex[0][row % 3][leftstart % 3];
|
||||||
|
hexmod[1] = allhex[0][row % 3][(leftstart + coloffset) % 3];
|
||||||
|
|
||||||
|
for (int col = leftstart, hexindex = 0; col < mcol; col += coloffset, coloffset ^= 3, hexindex ^= 1) {
|
||||||
float *pix = &rawData[row][col];
|
float *pix = &rawData[row][col];
|
||||||
short *hex = allhex[0][row % 3][col % 3];
|
short *hex = hexmod[hexindex];
|
||||||
float color[4];
|
float color[4];
|
||||||
color[0] = 0.6796875f * (pix[hex[1]] + pix[hex[0]]) -
|
color[0] = 0.6796875f * (pix[hex[1]] + pix[hex[0]]) -
|
||||||
0.1796875f * (pix[2 * hex[1]] + pix[2 * hex[0]]);
|
0.1796875f * (pix[2 * hex[1]] + pix[2 * hex[0]]);
|
||||||
@ -4335,9 +4342,12 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int f = fcol(row, leftstart);
|
int f = fcol(row, leftstart);
|
||||||
|
short *hexmod[2];
|
||||||
|
hexmod[0] = allhex[1][row % 3][leftstart % 3];
|
||||||
|
hexmod[1] = allhex[1][row % 3][(leftstart + coloffset) % 3];
|
||||||
|
|
||||||
for (int col = leftstart; col < mcol - 2; col += coloffset, coloffset ^= 3, f = f ^ (coloffset & 2) ) {
|
for (int col = leftstart, hexindex = 0; col < mcol - 2; col += coloffset, coloffset ^= 3, f = f ^ (coloffset & 2), hexindex ^= 1 ) {
|
||||||
short *hex = allhex[1][row % 3][col % 3];
|
short *hex = hexmod[hexindex];
|
||||||
|
|
||||||
for (int d = 3; d < 6; d++) {
|
for (int d = 3; d < 6; d++) {
|
||||||
float (*rix)[3] = &rgb[(d - 2) ^ 1][row - top][col - left];
|
float (*rix)[3] = &rgb[(d - 2) ^ 1][row - top][col - left];
|
||||||
@ -4448,9 +4458,13 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
|||||||
|
|
||||||
for (int row = topstart; row < mrow - 2; row++) {
|
for (int row = topstart; row < mrow - 2; row++) {
|
||||||
if ((row - sgrow) % 3) {
|
if ((row - sgrow) % 3) {
|
||||||
for (int col = leftstart, coloffset = coloffsetstart; col < mcol - 2; col += coloffset, coloffset ^= 3) {
|
short *hexmod[2];
|
||||||
|
hexmod[0] = allhex[1][row % 3][leftstart % 3];
|
||||||
|
hexmod[1] = allhex[1][row % 3][(leftstart + coloffsetstart) % 3];
|
||||||
|
|
||||||
|
for (int col = leftstart, coloffset = coloffsetstart, hexindex = 0; col < mcol - 2; col += coloffset, coloffset ^= 3, hexindex ^= 1) {
|
||||||
float (*rix)[3] = &rgb[0][row - top][col - left];
|
float (*rix)[3] = &rgb[0][row - top][col - left];
|
||||||
short *hex = allhex[1][row % 3][col % 3];
|
short *hex = hexmod[hexindex];
|
||||||
|
|
||||||
for (int d = 0; d < ndir; d += 2, rix += ts * ts) {
|
for (int d = 0; d < ndir; d += 2, rix += ts * ts) {
|
||||||
if (hex[d] + hex[d + 1]) {
|
if (hex[d] + hex[d + 1]) {
|
||||||
@ -4582,7 +4596,7 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
|||||||
for (int d = 0; d < ndir; d++)
|
for (int d = 0; d < ndir; d++)
|
||||||
for (int v = -1; v <= 1; v++)
|
for (int v = -1; v <= 1; v++)
|
||||||
for (int h = -1; h <= 1; h++) {
|
for (int h = -1; h <= 1; h++) {
|
||||||
homo[d][row][col] += (drv[d][row + v - 5][col + h - 5] <= tr ? 1 : 0) ;
|
homo[d][row][col] += (drv[d][row + v - 5][col + h - 5] <= tr ? 1 : 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4643,21 +4657,63 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Average the most homogenous pixels for the final result: */
|
// calculate maximum of homogeneity maps per pixel. Vectorized calculation is a tiny bit faster than on the fly calculation in next step
|
||||||
|
#ifdef __SSE2__
|
||||||
|
vint maskv = _mm_set1_epi8(31);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (int row = MIN(top, 8); row < mrow - 8; row++) {
|
||||||
|
int col = startcol;
|
||||||
|
#ifdef __SSE2__
|
||||||
|
int endcol = row < mrow - 9 ? mcol - 8 : mcol - 23;
|
||||||
|
|
||||||
|
for (; col < endcol; col += 16) {
|
||||||
|
vint maxval1 = _mm_max_epu8(_mm_loadu_si128((vint*)&homosum[0][row][col]), _mm_loadu_si128((vint*)&homosum[1][row][col]));
|
||||||
|
vint maxval2 = _mm_max_epu8(_mm_loadu_si128((vint*)&homosum[2][row][col]), _mm_loadu_si128((vint*)&homosum[3][row][col]));
|
||||||
|
|
||||||
|
if(ndir > 4) {
|
||||||
|
vint maxval3 = _mm_max_epu8(_mm_loadu_si128((vint*)&homosum[4][row][col]), _mm_loadu_si128((vint*)&homosum[5][row][col]));
|
||||||
|
vint maxval4 = _mm_max_epu8(_mm_loadu_si128((vint*)&homosum[6][row][col]), _mm_loadu_si128((vint*)&homosum[7][row][col]));
|
||||||
|
maxval1 = _mm_max_epu8(maxval1, maxval3);
|
||||||
|
maxval1 = _mm_max_epu8(maxval1, maxval4);
|
||||||
|
}
|
||||||
|
|
||||||
|
maxval1 = _mm_max_epu8(maxval1, maxval2);
|
||||||
|
// there is no shift intrinsic for epu8. Shift using epi32 and mask the wrong bits out
|
||||||
|
vint subv = _mm_srli_epi32( maxval1, 3 );
|
||||||
|
subv = _mm_and_si128(subv, maskv);
|
||||||
|
maxval1 = _mm_subs_epu8(maxval1, subv);
|
||||||
|
_mm_storeu_si128((vint*)&homosummax[row][col], maxval1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (; col < mcol - 8; col ++) {
|
||||||
|
uint8_t maxval = homosum[0][row][col];
|
||||||
|
|
||||||
|
for(int d = 1; d < ndir; d++) {
|
||||||
|
maxval = maxval < homosum[d][row][col] ? homosum[d][row][col] : maxval;
|
||||||
|
}
|
||||||
|
|
||||||
|
maxval -= maxval >> 3;
|
||||||
|
homosummax[row][col] = maxval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Average the most homogeneous pixels for the final result: */
|
||||||
|
uint8_t hm[8];
|
||||||
|
|
||||||
for (int row = MIN(top, 8); row < mrow - 8; row++)
|
for (int row = MIN(top, 8); row < mrow - 8; row++)
|
||||||
for (int col = MIN(left, 8); col < mcol - 8; col++) {
|
for (int col = MIN(left, 8); col < mcol - 8; col++) {
|
||||||
uint8_t hm[8];
|
|
||||||
uint8_t maxval = 0;
|
|
||||||
int d = 0;
|
int d = 0;
|
||||||
|
|
||||||
for (; d < 4; d++) {
|
for (; d < 4; d++) {
|
||||||
hm[d] = homosum[d][row][col];
|
hm[d] = homosum[d][row][col];
|
||||||
maxval = (maxval < hm[d] ? hm[d] : maxval);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; d < ndir; d++) {
|
for (; d < ndir; d++) {
|
||||||
hm[d] = homosum[d][row][col];
|
hm[d] = homosum[d][row][col];
|
||||||
maxval = (maxval < hm[d] ? hm[d] : maxval);
|
|
||||||
|
|
||||||
if (hm[d - 4] < hm[d]) {
|
if (hm[d - 4] < hm[d]) {
|
||||||
hm[d - 4] = 0;
|
hm[d - 4] = 0;
|
||||||
@ -4666,9 +4722,10 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
maxval -= maxval >> 3;
|
|
||||||
float avg[4] = {0.f};
|
float avg[4] = {0.f};
|
||||||
|
|
||||||
|
uint8_t maxval = homosummax[row][col];
|
||||||
|
|
||||||
for (d = 0; d < ndir; d++)
|
for (d = 0; d < ndir; d++)
|
||||||
if (hm[d] >= maxval) {
|
if (hm[d] >= maxval) {
|
||||||
FORC3 avg[c] += rgb[d][row][col][c];
|
FORC3 avg[c] += rgb[d][row][col][c];
|
||||||
@ -4698,7 +4755,7 @@ void RawImageSource::xtrans_interpolate (const int passes, const bool useCieLab)
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#undef CLIP
|
||||||
void RawImageSource::fast_xtrans_interpolate ()
|
void RawImageSource::fast_xtrans_interpolate ()
|
||||||
{
|
{
|
||||||
if (settings->verbose) {
|
if (settings->verbose) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user