Initial commit of the 32 bits Tiff file support, see issue 1575

2013-01-10 23:13:54 +01:00
commit 5aeedda116
1459 changed files with 323345 additions and 0 deletions
--- a/rtengine/EdgePreservingDecomposition.cc
+++ b/rtengine/EdgePreservingDecomposition.cc
@@ -0,0 +1,492 @@
+#include <cmath>
+#include "rt_math.h"
+#include "EdgePreservingDecomposition.h"
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+/* Solves A x = b by the conjugate gradient method, where instead of feeding it the matrix A you feed it a function which
+calculates A x where x is some vector. Stops when rms residual < RMSResidual or when maximum iterates is reached.
+Stops at n iterates if MaximumIterates = 0 since that many iterates gives exact solution. Applicable to symmetric positive
+definite problems only, which is what unconstrained smooth optimization pretty much always is.
+Parameter pass can be passed through, containing whatever info you like it to contain (matrix info?).
+Takes less memory with OkToModify_b = true, and Preconditioner = NULL. */
+float *SparseConjugateGradient(void Ax(float *Product, float *x, void *Pass), float *b, unsigned int n, bool OkToModify_b, 
+	float *x, float RMSResidual, void *Pass, unsigned int MaximumIterates, void Preconditioner(float *Product, float *x, void *Pass)){
+	unsigned int iterate, i;
+	
+	float *r = new float[n];
+
+	//Start r and x.
+	if(x == NULL){
+		x = new float[n];
+		memset(x, 0, sizeof(float)*n);		//Zero initial guess if x == NULL.
+		memcpy(r, b, sizeof(float)*n);
+	}else{
+		Ax(r, x, Pass);
+		#ifdef _OPENMP
+		#pragma omp  parallel for schedule(dynamic,10)
+		#endif
+		for(int ii = 0; ii < n; ii++) r[ii] = b[ii] - r[ii];		//r = b - A x.
+	}
+	//s is preconditionment of r. Without, direct to r.
+	float *s = r, rs = 0.0f, fp=0.f;
+	if(Preconditioner != NULL){
+		s = new float[n];
+		Preconditioner(s, r, Pass);
+	}
+	#ifdef _OPENMP	
+	#pragma omp  parallel for schedule(dynamic,10) firstprivate(fp) reduction(+:rs)
+	#endif                       
+	for(int ii = 0; ii < n; ii++) {
+		fp = r[ii]*s[ii];
+		rs=rs+fp;
+		}
+	//Search direction d.
+	float *d = new float[n];
+	memcpy(d, s, sizeof(float)*n);
+
+	//Store calculations of Ax in this.
+	float *ax = b;
+	if(!OkToModify_b) ax = new float[n];
+
+	//Start iterating!
+	if(MaximumIterates == 0) MaximumIterates = n;
+	for(iterate = 0; iterate < MaximumIterates; iterate++){
+		//Get step size alpha, store ax while at it.
+		float ab = 0.0f;
+		Ax(ax, d, Pass);
+		for(int ii = 0; ii < n; ii++) ab += d[ii]*ax[ii];
+
+		if(ab == 0.0f) break;	//So unlikely. It means perfectly converged or singular, stop either way.
+		ab = rs/ab;
+
+		//Update x and r with this step size.
+		float rms = 0.0;
+		for(int ii = 0; ii < n; ii++){
+			x[ii] += ab*d[ii];
+			r[ii] -= ab*ax[ii];	//"Fast recursive formula", use explicit r = b - Ax occasionally?
+			rms += r[ii]*r[ii];
+		}
+		rms = sqrtf(rms/n);
+//printf("%f\n", rms);
+		//Quit? This probably isn't the best stopping condition, but ok.
+		if(rms < RMSResidual) break;
+
+		if(Preconditioner != NULL) Preconditioner(s, r, Pass);
+
+		//Get beta.
+		ab = rs;
+		rs = 0.0f;
+		for(int ii = 0; ii < n; ii++) rs += r[ii]*s[ii];
+		ab = rs/ab;
+
+		//Update search direction p.
+		for(int ii = 0; ii < n; ii++) d[ii] = s[ii] + ab*d[ii];
+	}
+
+	if(iterate == MaximumIterates)
+		if(iterate != n && RMSResidual != 0.0f)
+			printf("Warning: MaximumIterates (%u) reached in SparseConjugateGradient.\n", MaximumIterates);
+
+	if(ax != b) delete[] ax;
+	if(s != r) delete[] s;
+	delete[] r;
+	delete[] d;
+	return x;
+}
+
+MultiDiagonalSymmetricMatrix::MultiDiagonalSymmetricMatrix(unsigned int Dimension, unsigned int NumberOfDiagonalsInLowerTriangle){
+	n = Dimension;
+	m = NumberOfDiagonalsInLowerTriangle;
+	IncompleteCholeskyFactorization = NULL;
+
+	Diagonals = new float *[m];
+	StartRows = new unsigned int [m];
+	memset(Diagonals, 0, sizeof(float *)*m);
+	memset(StartRows, 0, sizeof(unsigned int)*m);
+}
+
+MultiDiagonalSymmetricMatrix::~MultiDiagonalSymmetricMatrix(){
+	for(unsigned int i = 0; i != m; i++) delete[] Diagonals[i];
+	delete[] Diagonals;
+	delete[] StartRows;
+}
+
+bool MultiDiagonalSymmetricMatrix::CreateDiagonal(unsigned int index, unsigned int StartRow){
+	if(index >= m){
+		printf("Error in MultiDiagonalSymmetricMatrix::CreateDiagonal: invalid index.\n");
+		return false;
+	}
+	if(index > 0)
+		if(StartRow <= StartRows[index - 1]){
+			printf("Error in MultiDiagonalSymmetricMatrix::CreateDiagonal: each StartRow must exceed the previous.\n");
+			return false;
+		}
+
+	delete[] Diagonals[index];
+	Diagonals[index] = new float[DiagonalLength(StartRow)];
+	if(Diagonals[index] == NULL){
+		printf("Error in MultiDiagonalSymmetricMatrix::CreateDiagonal: memory allocation failed. Out of memory?\n");
+		return false;
+	}
+
+	StartRows[index] = StartRow;
+	memset(Diagonals[index], 0, sizeof(float)*DiagonalLength(StartRow));
+	return true;
+}
+
+int MultiDiagonalSymmetricMatrix::FindIndex(unsigned int StartRow){
+	//There's GOT to be a better way to do this. "Bidirectional map?"
+	for(unsigned int i = 0; i != m; i++)
+		if(StartRows[i] == StartRow)
+			return i;
+	return -1;
+}
+
+bool MultiDiagonalSymmetricMatrix::LazySetEntry(float value, unsigned int row, unsigned int column){
+	//On the strict upper triangle? Swap, this is ok due to symmetry.
+	int i, sr;
+	if(column > row)
+		i = column,
+		column = row,
+		row = i;
+	if(row >= n) return false;
+	sr = row - column;
+
+	//Locate the relevant diagonal.
+	i = FindIndex(sr);
+	if(i < 0) return false;
+
+	Diagonals[i][column] = value;
+	return true;
+}
+
+void MultiDiagonalSymmetricMatrix::VectorProduct(float *Product, float *x){
+	//Initialize to zero.
+	memset(Product, 0, n*sizeof(float));
+
+	//Loop over the stored diagonals.
+	for(unsigned int i = 0; i != m; i++){
+		unsigned int sr = StartRows[i];
+		float *a = Diagonals[i];	//One fewer dereference.
+		unsigned int j, l = DiagonalLength(sr);
+
+		if(sr == 0)
+			for(j = 0; j != l; j++)
+				Product[j] += a[j]*x[j];		//Separate, fairly simple treatment for the main diagonal.
+		else
+			for(j = 0; j != l; j++)
+				Product[j + sr] += a[j]*x[j],	//Contribution from lower...
+				Product[j] += a[j]*x[j + sr];	//...and upper triangle.
+	}
+}
+
+bool MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization(unsigned int MaxFillAbove){
+	if(m == 1){
+		printf("Error in MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization: just one diagonal? Can you divide?\n");
+		return false;
+	}
+	if(StartRows[0] != 0){
+		printf("Error in MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization: main diagonal required to exist for this math.\n");
+		return false;
+	}
+
+	//How many diagonals in the decomposition?
+	MaxFillAbove++;	//Conceptually, now "fill" includes an existing diagonal. Simpler in the math that follows.
+	unsigned int i, j, mic, fp;
+	mic=1;
+	fp=1;
+	#ifdef _OPENMP
+	#pragma omp parallel for schedule(dynamic,10) firstprivate(fp) reduction(+:mic)
+	#endif 
+	for(int ii = 1; ii < m; ii++) {
+		fp = rtengine::min(StartRows[ii] - StartRows[ii - 1], MaxFillAbove);	//Guarunteed positive since StartRows must be created in increasing order.
+		mic=mic+fp;
+		}
+	//Initialize the decomposition - setup memory, start rows, etc.
+	MultiDiagonalSymmetricMatrix *ic = new MultiDiagonalSymmetricMatrix(n, mic);
+	ic->CreateDiagonal(0, 0);	//There's always a main diagonal in this type of decomposition.
+	mic=1;
+	for(int ii = 1; ii < m; ii++){
+		//Set j to the number of diagonals to be created corresponding to a diagonal on this source matrix...
+		j = rtengine::min(StartRows[ii] - StartRows[ii - 1], MaxFillAbove);
+
+		//...and create those diagonals. I want to take a moment to tell you about how much I love minimalistic loops: very much.
+		while(j-- != 0)
+			if(!ic->CreateDiagonal(mic++, StartRows[ii] - j)){
+				//Beware of out of memory, possible for large, sparse problems if you ask for too much fill.
+				printf("Error in MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization: Out of memory. Ask for less fill?\n");
+				delete ic;
+				return false;
+			}
+	}
+
+
+	//It's all initialized? Uhkay. Do the actual math then.
+	int sss, ss, s;
+	unsigned int k, MaxStartRow = StartRows[m - 1];	//Handy number.
+	float **l = ic->Diagonals;
+	float  *d = ic->Diagonals[0];		//Describes D in LDLt.
+
+	//Loop over the columns.
+	for(j = 0; j != n; j++){
+		//Calculate d for this column.
+		d[j] = Diagonals[0][j];
+
+		//This is a loop over k from 1 to j, inclusive. We'll cover that by looping over the index of the diagonals (s), and get k from it.
+		//The first diagonal is d (k = 0), so skip that and have s start at 1. Cover all available s but stop if k exceeds j.
+		for(s = 1; s != ic->m; s++){
+			k = ic->StartRows[s];
+			if(k > j) break;
+			d[j] -= l[s][j - k]*l[s][j - k]*d[j - k];
+		}
+
+		if(d[j] == 0.0f){
+			printf("Error in MultiDiagonalSymmetricMatrix::CreateIncompleteCholeskyFactorization: division by zero. Matrix not decomposable.\n");
+			delete ic;
+			return false;
+		}
+		float id = 1.0f/d[j];
+
+		//Now, calculate l from top down along this column.
+		for(s = 1; s != ic->m; s++){
+			i = ic->StartRows[s];		//Start row for this entry.
+			if(j >= ic->n - i) break;	//Possible values of j are limited.
+
+			//Quicker access for an element of l.
+			float *lij = &l[s][j];
+			sss = FindIndex(i);	//Find element in same spot in the source matrix. It might be a zero.
+			*lij = sss < 0 ? 0.0f : Diagonals[sss][j];
+
+			//Similar to the loop involving d, convoluted by the fact that two l are involved.
+			for(ss = 1; ss != ic->m; ss++){
+				k = ic->StartRows[ss];
+				if(k > j) break;
+				if(i + k > MaxStartRow) break;	//Quick exit once k to big.
+
+				int sss = ic->FindIndex(i + k);
+				if(sss < 0) continue;			//Asked for diagonal nonexistant. But there may be something later, so don't break.
+
+				/* Let's think about the factors in the term below for a moment.
+				j varies from 0 to n - 1, so j - k is bounded inclusive by 0 and j - 1. So d[j - k] is always in the matrix.
+
+				l[sss] and l[ss] are diagonals with corresponding start rows i + k and k.
+				For l[sss][j - k] to exist, we must have j - k < n - (i + k) -> j < n - i, which was checked outside this loop and true at this point.
+				For l[ ss][j - k] to exist, we must have j - k < n - k -> j < n, which is true straight from definition.
+
+				So, no additional checks, all is good and within bounds at this point.*/
+				*lij -= l[sss][j - k]*l[ss][j - k]*d[j - k];
+			}
+
+			*lij *= id;
+		}
+	}
+
+	IncompleteCholeskyFactorization = ic;
+	return true;
+}
+
+void MultiDiagonalSymmetricMatrix::KillIncompleteCholeskyFactorization(void){
+	delete IncompleteCholeskyFactorization;
+}
+
+void MultiDiagonalSymmetricMatrix::CholeskyBackSolve(float *x, float *b){
+	//We want to solve L D Lt x = b where D is a diagonal matrix described by Diagonals[0] and L is a unit lower triagular matrix described by the rest of the diagonals.
+	//Let D Lt x = y. Then, first solve L y = b.
+	float *y = new float[n];
+	float **d = IncompleteCholeskyFactorization->Diagonals;
+	unsigned int *s = IncompleteCholeskyFactorization->StartRows;
+	unsigned int M = IncompleteCholeskyFactorization->m, N = IncompleteCholeskyFactorization->n;
+	unsigned int i, j;
+	for(j = 0; j != N; j++){
+		y[j] = b[j];
+
+		for(i = 1; i != M; i++){	//Start at 1 because zero is D.
+			int c = (int)j - (int)s[i];
+			if(c < 0) break;		//Due to ordering of StartRows, no further contributions.
+			y[j] -= d[i][c]*y[c];
+		}
+	}
+
+	//Now, solve x from D Lt x = y -> Lt x = D^-1 y
+	while(j-- != 0){
+		x[j] = y[j]/d[0][j];
+
+		for(i = 1; i != M; i++){
+			if(j + s[i] >= N) break;
+			x[j] -= d[i][j]*x[j + s[i]];
+		}
+	}
+
+	delete[] y;
+}
+
+
+
+
+EdgePreservingDecomposition::EdgePreservingDecomposition(unsigned int width, unsigned int height){
+	w = width;
+	h = height;
+	n = w*h;
+
+	//Initialize the matrix just once at construction.
+	A = new MultiDiagonalSymmetricMatrix(n, 5);
+	if(!(
+		A->CreateDiagonal(0, 0) &&
+		A->CreateDiagonal(1, 1) &&
+		A->CreateDiagonal(2, w - 1) &&
+		A->CreateDiagonal(3, w) &&
+		A->CreateDiagonal(4, w + 1))){
+		delete A;
+		A = NULL;
+		printf("Error in EdgePreservingDecomposition construction: out of memory.\n");
+	}else{
+		a0    = A->Diagonals[0];
+		a_1   = A->Diagonals[1];
+		a_w1  = A->Diagonals[2];
+		a_w   = A->Diagonals[3];
+		a_w_1 = A->Diagonals[4];
+	}
+}
+
+EdgePreservingDecomposition::~EdgePreservingDecomposition(){
+	delete A;
+}
+
+float *EdgePreservingDecomposition::CreateBlur(float *Source, float Scale, float EdgeStopping, unsigned int Iterates, float *Blur, bool UseBlurForEdgeStop){
+	if(Blur == NULL)
+		UseBlurForEdgeStop = false,	//Use source if there's no supplied Blur.
+		Blur = new float[n];
+	if(Scale == 0.0f){
+		memcpy(Blur, Source, n*sizeof(float));
+		return Blur;
+	}
+
+	//Create the edge stopping function a, rotationally symmetric and just one instead of (ax, ay). Maybe don't need Blur yet, so use its memory.
+	float *a, *g;
+	if(UseBlurForEdgeStop) a = new float[n], g = Blur;
+	else a = Blur, g = Source;
+
+	//unsigned int x, y;
+	unsigned int i;
+	unsigned int w1 = w - 1, h1 = h - 1;
+	float eps = 0.02f;
+	#ifdef _OPENMP
+	#pragma omp parallel for schedule(dynamic,10)
+	#endif
+	for(int y = 0; y < h1; y++){
+		float *rg = &g[w*y];
+		for(int x = 0; x < w1; x++){
+			//Estimate the central difference gradient in the center of a four pixel square. (gx, gy) is actually 2*gradient.
+			float gx = (rg[x + 1] - rg[x]) + (rg[x + w + 1] - rg[x + w]);
+			float gy = (rg[x + w] - rg[x]) + (rg[x + w + 1] - rg[x + 1]);
+
+			//Apply power to the magnitude of the gradient to get the edge stopping function.
+			a[x + w*y] = Scale*powf(0.5f*sqrtf(gx*gx + gy*gy + eps*eps), -EdgeStopping);
+		}
+	}
+//unsigned int x,y;
+	/* Now setup the linear problem. I use the Maxima CAS, here's code for making an FEM formulation for the smoothness term:
+		p(x, y) := (1 - x)*(1 - y);
+		P(m, n) := A[m][n]*p(x, y) + A[m + 1][n]*p(1 - x, y) + A[m + 1][n + 1]*p(1 - x, 1 - y) + A[m][n + 1]*p(x, 1 - y);
+		Integrate(f) := integrate(integrate(f, x, 0, 1), y, 0, 1);
+
+		Integrate(diff(P(u, v), x)*diff(p(x, y), x) + diff(P(u, v), y)*diff(p(x, y), y));
+		Integrate(diff(P(u - 1, v), x)*diff(p(1 - x, y), x) + diff(P(u - 1, v), y)*diff(p(1 - x, y), y));
+		Integrate(diff(P(u - 1, v - 1), x)*diff(p(1 - x, 1 - y), x) + diff(P(u - 1, v - 1), y)*diff(p(1 - x, 1 - y), y));
+		Integrate(diff(P(u, v - 1), x)*diff(p(x, 1 - y), x) + diff(P(u, v - 1), y)*diff(p(x, 1 - y), y));
+	So yeah. Use the numeric results of that to fill the matrix A.*/
+	memset(a_1, 0, A->DiagonalLength(1)*sizeof(float));
+	memset(a_w1, 0, A->DiagonalLength(w - 1)*sizeof(float));
+	memset(a_w, 0, A->DiagonalLength(w)*sizeof(float));
+	memset(a_w_1, 0, A->DiagonalLength(w + 1)*sizeof(float));
+	unsigned int x, y;
+	for(i = y = 0; y != h; y++){
+		for(x = 0; x != w; x++, i++){
+			float ac;
+			a0[i] = 1.0;
+
+			//Remember, only fill the lower triangle. Memory for upper is never made. It's symmetric. Trust.
+			if(x > 0 && y > 0)
+				ac = a[i - w - 1]/6.0f,
+				a_w_1[i - w - 1] -= 2.0f*ac, a_w[i - w] -= ac,
+				a_1[i - 1]       -=      ac, a0[i] += 4.0f*ac;
+
+			if(x < w1 && y > 0)
+				ac = a[i - w]/6.0f,
+				a_w[i - w] -= ac, a_w1[i - w + 1] -= 2.0f*ac,
+				a0[i] += 4.0f*ac;
+
+			if(x > 0 && y < h1)
+				ac = a[i - 1]/6.0f,
+				a_1[i - 1] -= ac, a0[i] += 4.0f*ac;
+
+			if(x < w1 && y < h1)
+				a0[i] += 4.0f*a[i]/6.0f;
+		}
+	}
+	
+  if(UseBlurForEdgeStop) delete[] a;
+
+  //Solve & return.
+  bool success=A->CreateIncompleteCholeskyFactorization(1); //Fill-in of 1 seems to work really good. More doesn't really help and less hurts (slightly).
+  if(!success) {
+    fprintf(stderr,"Error: Tonemapping has failed.\n");
+    memset(Blur, 0, sizeof(float)*n);  // On failure, set the blur to zero.  This is subsequently exponentiated in CompressDynamicRange.
+    return Blur;
+  }
+  if(!UseBlurForEdgeStop) memcpy(Blur, Source, n*sizeof(float));
+  SparseConjugateGradient(A->PassThroughVectorProduct, Source, n, false, Blur, 0.0f, (void *)A, Iterates, A->PassThroughCholeskyBackSolve);
+  A->KillIncompleteCholeskyFactorization();
+  return Blur;
+}
+
+float *EdgePreservingDecomposition::CreateIteratedBlur(float *Source, float Scale, float EdgeStopping, unsigned int Iterates, unsigned int Reweightings, float *Blur){
+	//Simpler outcome?
+	if(Reweightings == 0) return CreateBlur(Source, Scale, EdgeStopping, Iterates, Blur);
+
+	//Create a blur here, initialize.
+	if(Blur == NULL) Blur = new float[n];
+	memcpy(Blur, Source, n*sizeof(float));
+
+	//Iteratively improve the blur.
+	Reweightings++;
+	for(unsigned int i = 0; i < Reweightings; i++)
+		CreateBlur(Source, Scale, EdgeStopping, Iterates, Blur, true);
+
+	return Blur;
+}
+
+float *EdgePreservingDecomposition::CompressDynamicRange(float *Source, float Scale, float EdgeStopping, float CompressionExponent, float DetailBoost, unsigned int Iterates, unsigned int Reweightings, float *Compressed){
+	//Small number intended to prevent division by zero. This is different from the eps in CreateBlur.
+	const float eps = 0.0001f;
+
+	//We're working with luminance, which does better logarithmic.
+	unsigned int i;
+	#ifdef _OPENMP
+	#pragma omp parallel for schedule(dynamic,10)
+	#endif	
+	for(int ii = 0; ii < n; ii++)
+		Source[ii] = logf(Source[ii] + eps);
+
+	//Blur. Also setup memory for Compressed (we can just use u since each element of u is used in one calculation).
+	float *u = CreateIteratedBlur(Source, Scale, EdgeStopping, Iterates, Reweightings);
+	if(Compressed == NULL) Compressed = u;
+
+	//Apply compression, detail boost, unlogging. Compression is done on the logged data and detail boost on unlogged.
+	#ifdef _OPENMP
+	#pragma omp parallel for schedule(dynamic,10)	
+	#endif
+	for(int i = 0; i < n; i++){
+		float ce = expf(Source[i] + u[i]*(CompressionExponent - 1.0f)) - eps;
+		float ue = expf(u[i]) - eps;
+		Source[i] = expf(Source[i]) - eps;
+		Compressed[i] = ce + DetailBoost*(Source[i] - ue);
+	}
+	
+	if(Compressed != u) delete[] u;
+	return Compressed;
+
+}
+