Tonemapping optimization, Issue 1895

2013-10-09 22:40:37 +02:00
parent 44268074d8
commit e7a3bcabc0
3 changed files with 549 additions and 249 deletions
--- a/rtengine/EdgePreservingDecomposition.h
+++ b/rtengine/EdgePreservingDecomposition.h
@@ -49,15 +49,15 @@ My email address is my screen name followed by @yahoo.com. I'm also known as ben
 #include <cmath>
 #include <cstdio>
 #include <cstring>
-
+#include "opthelper.h"

 //This is for solving big symmetric positive definite linear problems.
-float *SparseConjugateGradient(void Ax(float *Product, float *x, void *Pass), float *b, unsigned int n, bool OkToModify_b = true, float *x = NULL, float RMSResidual = 0.0f, void *Pass = NULL, unsigned int MaximumIterates = 0, void Preconditioner(float *Product, float *x, void *Pass) = NULL);
+float *SparseConjugateGradient(void Ax(float *Product, float *x, void *Pass), float *b, int n, bool OkToModify_b = true, float *x = NULL, float RMSResidual = 0.0f, void *Pass = NULL, int MaximumIterates = 0, void Preconditioner(float *Product, float *x, void *Pass) = NULL);

 //Storage and use class for symmetric matrices, the nonzero contents of which are confined to diagonals.
 class MultiDiagonalSymmetricMatrix{
 public:
-	MultiDiagonalSymmetricMatrix(unsigned int Dimension, unsigned int NumberOfDiagonalsInLowerTriangle);
+	MultiDiagonalSymmetricMatrix(int Dimension, int NumberOfDiagonalsInLowerTriangle);
 	~MultiDiagonalSymmetricMatrix();

 	/* Storage of matrix data, and a function to create memory for Diagonals[index].
@@ -69,22 +69,24 @@ public:
 	public Diagonals manually. Symmetric matrices are represented by this class, and all symmetry is handled internally, you
 	only every worry or think about the lower trianglular (including main diagonal) part of the matrix.
 	*/
-	float **Diagonals;
-	unsigned int *StartRows;
-	bool CreateDiagonal(unsigned int index, unsigned int StartRow);
-	unsigned int n, m;	//The matrix is n x n, with m diagonals on the lower triangle. Don't change these. They should be private but aren't for convenience.
-	inline unsigned int DiagonalLength(unsigned int StartRow){	//Gives number of elements in a diagonal.
+	float **Diagonals;
+	char *buffer;
+	char *DiagBuffer;
+	int *StartRows;
+	bool CreateDiagonal(int index, int StartRow);
+	int n, m;	//The matrix is n x n, with m diagonals on the lower triangle. Don't change these. They should be private but aren't for convenience.
+	inline int DiagonalLength(int StartRow){	//Gives number of elements in a diagonal.
 		return n - StartRow;
 	};

 	//Not efficient, but you can use it if you're lazy, or for early tests. Returns false if the row + column falls on no loaded diagonal, true otherwise.
-	bool LazySetEntry(float value, unsigned int row, unsigned int column);
+	bool LazySetEntry(float value, int row, int column);

 	//Calculates the matrix-vector product of the matrix represented by this class onto the vector x.
 	void VectorProduct(float *Product, float *x);

 	//Given the start row, attempts to find the corresponding index, or -1 if the StartRow doesn't exist.
-	int FindIndex(unsigned int StartRow);
+	inline int FindIndex(int StartRow) __attribute__((always_inline));

 	//This is the same as above, but designed to take this class as a pass through variable. By this way you can feed
 	//the meat of this class into an independent function, such as SparseConjugateGradient.
@@ -96,7 +98,7 @@ public:
 	LDLt factorization of this matrix. Storage is like this: the first diagonal is the diagonal matrix D and the remaining diagonals
 	describe all of L except its main diagonal,	which is a bunch of ones. Read up on the LDLt Cholesky factorization for what all this means.
 	Note that VectorProduct is nonsense. More useful to you is CholeskyBackSolve which fills x, where LDLt x = b. */
-	bool CreateIncompleteCholeskyFactorization(unsigned int MaxFillAbove = 0);
+	bool CreateIncompleteCholeskyFactorization(int MaxFillAbove = 0);
 	void KillIncompleteCholeskyFactorization(void);
 	void CholeskyBackSolve(float *x, float *b);
 	MultiDiagonalSymmetricMatrix *IncompleteCholeskyFactorization;
@@ -109,27 +111,27 @@ public:

 class EdgePreservingDecomposition{
 public:
-	EdgePreservingDecomposition(unsigned int width, unsigned int height);
+	EdgePreservingDecomposition(int width, int height);
 	~EdgePreservingDecomposition();

 	//Create an edge preserving blur of Source. Will create and return, or fill into Blur if not NULL. In place not ok.
 	//If UseBlurForEdgeStop is true, supplied not NULL Blur is used to calculate the edge stopping function instead of Source.
-	float *CreateBlur(float *Source, float Scale, float EdgeStopping, unsigned int Iterates, float *Blur = NULL, bool UseBlurForEdgeStop = false);
+	float *CreateBlur(float *Source, float Scale, float EdgeStopping, int Iterates, float *Blur = NULL, bool UseBlurForEdgeStop = false);

 	//Iterates CreateBlur such that the smoothness term approaches a specific norm via iteratively reweighted least squares. In place not ok.
-	float *CreateIteratedBlur(float *Source, float Scale, float EdgeStopping, unsigned int Iterates, unsigned int Reweightings, float *Blur = NULL);
+	float *CreateIteratedBlur(float *Source, float Scale, float EdgeStopping, int Iterates, int Reweightings, float *Blur = NULL);

 	/*Lowers global contrast while preserving or boosting local contrast. Can fill into Compressed. The smaller Compression
 	the more compression is applied, with Compression = 1 giving no effect and above 1 the opposite effect. You can totally
 	use Compression = 1 and play with DetailBoost for some really sweet unsharp masking. If working on luma/grey, consider giving it a logarithm.
 	In place calculation to save memory (Source == Compressed) is totally ok. Reweightings > 0 invokes CreateIteratedBlur instead of CreateBlur. */
-	float *CompressDynamicRange(float *Source, float Scale = 1.0f, float EdgeStopping = 1.4f, float CompressionExponent = 0.8f, float DetailBoost = 0.1f, unsigned int Iterates = 20, unsigned int Reweightings = 0, float *Compressed = NULL);
+	float *CompressDynamicRange(float *Source, float Scale = 1.0f, float EdgeStopping = 1.4f, float CompressionExponent = 0.8f, float DetailBoost = 0.1f, int Iterates = 20, int Reweightings = 0, float *Compressed = NULL);

 private:
 	MultiDiagonalSymmetricMatrix *A;	//The equations are simple enough to not mandate a matrix class, but fast solution NEEDS a complicated preconditioner.
-	unsigned int w, h, n;
+	int w, h, n;

 	//Convenient access to the data in A.
-	float *a0, *a_1, *a_w, *a_w_1, *a_w1;
+	float * RESTRICT a0, * RESTRICT a_1, * RESTRICT a_w, * RESTRICT a_w_1, * RESTRICT a_w1;
 };