Add a view for sparse symmetric matrix to exploit cache coherency. This way we can increase maximum iteration count still with good performance.

2019-05-04 19:17:40 -03:00 · 2019-05-04 19:17:40 -03:00 · b448acfec6
commit b448acfec6
parent b5edb9b1c7
4 changed files with 167 additions and 18 deletions
--- a/include/bounce/cloth/cloth_solver.h
+++ b/include/bounce/cloth/cloth_solver.h
@ -31,6 +31,7 @@ class b3Force;
 struct b3DenseVec3;
 struct b3DiagMat33;
 struct b3SparseSymMat33;
+struct b3SparseSymMat33View;

 class b3BodyContact;
 class b3ParticleContact;
@ -86,7 +87,7 @@ private:
 	void ApplyConstraints();

 	// Solve Ax = b.
-	void Solve(b3DenseVec3& x, u32& iterations, const b3SparseSymMat33& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const;
+	void Solve(b3DenseVec3& x, u32& iterations, const b3SparseSymMat33View& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const;

 	b3StackAllocator* m_allocator;

--- a/include/bounce/cloth/sparse_sym_mat33.h
+++ b/include/bounce/cloth/sparse_sym_mat33.h
@ -119,9 +119,6 @@ struct b3SparseSymMat33
 	// 
 	void operator-=(const b3SparseSymMat33& m);

-	// 
-	void Diagonal(b3DiagMat33& out) const;
-
 	u32 rowCount;
 	b3RowValueList* rows; 
 };
@ -345,16 +342,6 @@ inline void b3SparseSymMat33::operator-=(const b3SparseSymMat33& m)
 	}
 }

-inline void b3SparseSymMat33::Diagonal(b3DiagMat33& out) const
-{
-	B3_ASSERT(rowCount == out.n);
-	
-	for (u32 i = 0; i < rowCount; ++i)
-	{
-		out[i] = (*this)(i, i);
-	}
-}
-
 inline void b3Add(b3SparseSymMat33& out, const b3SparseSymMat33& a, const b3SparseSymMat33& b)
 {
 	out = a;
--- a/include/bounce/cloth/sparse_sym_mat33_view.h
+++ b/include/bounce/cloth/sparse_sym_mat33_view.h
@ -0,0 +1,157 @@
+/*
+* Copyright (c) 2016-2019 Irlan Robson https://irlanrobson.github.io
+*
+* This software is provided 'as-is', without any express or implied
+* warranty.  In no event will the authors be held liable for any damages
+* arising from the use of this software.
+* Permission is granted to anyone to use this software for any purpose,
+* including commercial applications, and to alter it and redistribute it
+* freely, subject to the following restrictions:
+* 1. The origin of this software must not be misrepresented; you must not
+* claim that you wrote the original software. If you use this software
+* in a product, an acknowledgment in the product documentation would be
+* appreciated but is not required.
+* 2. Altered source versions must be plainly marked as such, and must not be
+* misrepresented as being the original software.
+* 3. This notice may not be removed or altered from any source distribution.
+*/
+
+#ifndef B3_SPARSE_SYM_MAT_33_VIEW_H
+#define B3_SPARSE_SYM_MAT_33_VIEW_H
+
+#include <bounce/cloth/sparse_sym_mat33.h>
+
+struct b3RowValueArray
+{
+	u32 count;
+	b3RowValue* values;
+};
+
+// A read-only sparse symmetric matrix.
+struct b3SparseSymMat33View
+{
+	//
+	b3SparseSymMat33View(const b3SparseSymMat33& _m);
+
+	//
+	~b3SparseSymMat33View();
+
+	//
+	const b3Mat33& operator()(u32 i, u32 j) const;
+	
+	//
+	void Diagonal(b3DiagMat33& out) const;
+		
+	u32 rowCount;
+	b3RowValueArray* rows;
+};
+
+inline b3SparseSymMat33View::b3SparseSymMat33View(const b3SparseSymMat33& _m)
+{
+	rowCount = _m.rowCount;
+	rows = (b3RowValueArray*)b3Alloc(rowCount * sizeof(b3RowValueArray));
+	for (u32 i = 0; i < _m.rowCount; ++i)
+	{
+		b3RowValueList* rowList = _m.rows + i;
+		b3RowValueArray* rowArray = rows + i;
+
+		rowArray->count = rowList->count;
+		rowArray->values = (b3RowValue*)b3Alloc(rowArray->count * sizeof(b3RowValue));
+
+		u32 valueIndex = 0;
+		for (b3RowValue* v = rowList->head; v; v = v->next)
+		{
+			rowArray->values[valueIndex].column = v->column;
+			rowArray->values[valueIndex].value = v->value;
+			++valueIndex;
+		}
+	}
+}
+
+inline b3SparseSymMat33View::~b3SparseSymMat33View()
+{
+	for (u32 i = 0; i < rowCount; ++i)
+	{
+		b3RowValueArray* rowArray = rows + i;
+		b3Free(rowArray->values);
+	}
+	b3Free(rows);
+}
+
+inline const b3Mat33& b3SparseSymMat33View::operator()(u32 i, u32 j) const
+{
+	B3_ASSERT(i < rowCount);
+	B3_ASSERT(j < rowCount);
+
+	if (i > j)
+	{
+		b3Swap(i, j);
+	}
+
+	b3RowValueArray* vs = rows + i;
+
+	for (u32 c = 0; c < vs->count; ++c)
+	{
+		b3RowValue* rv = vs->values + c;
+
+		u32 column = rv->column;
+
+		if (column < j)
+		{
+			break;
+		}
+
+		if (column == j)
+		{
+			return rv->value;
+		}
+	}
+
+	return b3Mat33_zero;
+}
+
+inline void b3SparseSymMat33View::Diagonal(b3DiagMat33& out) const
+{
+	B3_ASSERT(rowCount == out.n);
+
+	for (u32 i = 0; i < rowCount; ++i)
+	{
+		out[i] = (*this)(i, i);
+	}
+}
+
+inline void b3Mul(b3DenseVec3& out, const b3SparseSymMat33View& A, const b3DenseVec3& v)
+{
+	B3_ASSERT(A.rowCount == out.n);
+
+	out.SetZero();
+
+	for (u32 i = 0; i < A.rowCount; ++i)
+	{
+		b3RowValueArray* rowArray = A.rows + i;
+
+		for (u32 c = 0; c < rowArray->count; ++c)
+		{
+			b3RowValue* rv = rowArray->values + c;
+
+			u32 j = rv->column;
+			b3Mat33 a = rv->value;
+
+			out[i] += a * v[j];
+
+			if (i != j)
+			{
+				out[j] += a * v[i];
+			}
+		}
+	}
+}
+
+inline b3DenseVec3 operator*(const b3SparseSymMat33View& A, const b3DenseVec3& v)
+{
+	b3DenseVec3 result(v.n);
+	b3Mul(result, A, v);
+	return result;
+}
+
+#endif
--- a/src/bounce/cloth/cloth_solver.cpp
+++ b/src/bounce/cloth/cloth_solver.cpp
@ -24,6 +24,7 @@
 #include <bounce/cloth/dense_vec3.h>
 #include <bounce/cloth/diag_mat33.h>
 #include <bounce/cloth/sparse_sym_mat33.h>
+#include <bounce/cloth/sparse_sym_mat33_view.h>
 #include <bounce/common/memory/stack_allocator.h>
 #include <bounce/common/math/mat.h>
 #include <bounce/dynamics/shapes/shape.h>
@ -277,6 +278,9 @@ void b3ClothSolver::Solve(float32 dt, const b3Vec3& gravity)

 		// A
 		b3SparseSymMat33 A = M - h * dfdv - h * h * dfdx;
+		
+		// View for A
+		b3SparseSymMat33View viewA(A);

 		// b
 		b3DenseVec3 b = h * (sf + h * (dfdx * sv) + dfdx * sy);
@ -284,7 +288,7 @@ void b3ClothSolver::Solve(float32 dt, const b3Vec3& gravity)
 		// x
 		b3DenseVec3 x(m_particleCount);
 		u32 iterations = 0;
-		Solve(x, iterations, A, b, S, z, sx0);
+		Solve(x, iterations, viewA, b, S, z, sx0);
 		b3_clothSolverIterations = iterations;
 #if 0
 		// Copy constraint forces to the contacts
@ -395,7 +399,7 @@ void b3ClothSolver::Solve(float32 dt, const b3Vec3& gravity)
 }

 void b3ClothSolver::Solve(b3DenseVec3& x, u32& iterations,
-	const b3SparseSymMat33& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const
+	const b3SparseSymMat33View& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const
 {
 	B3_PROFILE("Cloth Solve Ax = b");

@ -446,11 +450,11 @@ void b3ClothSolver::Solve(b3DenseVec3& x, u32& iterations,
 	float32 delta_new = b3Dot(r, p);

 	// Set the tolerance.
-	const float32 tolerance = 10.0f * B3_EPSILON;
+	const float32 tolerance = 2.0f * B3_EPSILON;

 	// Maximum number of iterations.
 	// Stop at this iteration if diverged.
-	const u32 max_iterations = 20;
+	const u32 max_iterations = 50;

 	u32 iteration = 0;