Add a view for sparse symmetric matrix to exploit cache coherency. This way we can increase maximum iteration count still with good performance.

This commit is contained in:
Irlan 2019-05-04 19:17:40 -03:00
parent b5edb9b1c7
commit b448acfec6
4 changed files with 167 additions and 18 deletions

View File

@ -31,6 +31,7 @@ class b3Force;
struct b3DenseVec3;
struct b3DiagMat33;
struct b3SparseSymMat33;
struct b3SparseSymMat33View;
class b3BodyContact;
class b3ParticleContact;
@ -86,7 +87,7 @@ private:
void ApplyConstraints();
// Solve Ax = b.
void Solve(b3DenseVec3& x, u32& iterations, const b3SparseSymMat33& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const;
void Solve(b3DenseVec3& x, u32& iterations, const b3SparseSymMat33View& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const;
b3StackAllocator* m_allocator;

View File

@ -119,9 +119,6 @@ struct b3SparseSymMat33
//
void operator-=(const b3SparseSymMat33& m);
//
void Diagonal(b3DiagMat33& out) const;
u32 rowCount;
b3RowValueList* rows;
};
@ -345,16 +342,6 @@ inline void b3SparseSymMat33::operator-=(const b3SparseSymMat33& m)
}
}
inline void b3SparseSymMat33::Diagonal(b3DiagMat33& out) const
{
B3_ASSERT(rowCount == out.n);
for (u32 i = 0; i < rowCount; ++i)
{
out[i] = (*this)(i, i);
}
}
inline void b3Add(b3SparseSymMat33& out, const b3SparseSymMat33& a, const b3SparseSymMat33& b)
{
out = a;

View File

@ -0,0 +1,157 @@
/*
* Copyright (c) 2016-2019 Irlan Robson https://irlanrobson.github.io
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef B3_SPARSE_SYM_MAT_33_VIEW_H
#define B3_SPARSE_SYM_MAT_33_VIEW_H
#include <bounce/cloth/sparse_sym_mat33.h>
struct b3RowValueArray
{
u32 count;
b3RowValue* values;
};
// A read-only sparse symmetric matrix.
struct b3SparseSymMat33View
{
//
b3SparseSymMat33View(const b3SparseSymMat33& _m);
//
~b3SparseSymMat33View();
//
const b3Mat33& operator()(u32 i, u32 j) const;
//
void Diagonal(b3DiagMat33& out) const;
u32 rowCount;
b3RowValueArray* rows;
};
inline b3SparseSymMat33View::b3SparseSymMat33View(const b3SparseSymMat33& _m)
{
rowCount = _m.rowCount;
rows = (b3RowValueArray*)b3Alloc(rowCount * sizeof(b3RowValueArray));
for (u32 i = 0; i < _m.rowCount; ++i)
{
b3RowValueList* rowList = _m.rows + i;
b3RowValueArray* rowArray = rows + i;
rowArray->count = rowList->count;
rowArray->values = (b3RowValue*)b3Alloc(rowArray->count * sizeof(b3RowValue));
u32 valueIndex = 0;
for (b3RowValue* v = rowList->head; v; v = v->next)
{
rowArray->values[valueIndex].column = v->column;
rowArray->values[valueIndex].value = v->value;
++valueIndex;
}
}
}
inline b3SparseSymMat33View::~b3SparseSymMat33View()
{
for (u32 i = 0; i < rowCount; ++i)
{
b3RowValueArray* rowArray = rows + i;
b3Free(rowArray->values);
}
b3Free(rows);
}
inline const b3Mat33& b3SparseSymMat33View::operator()(u32 i, u32 j) const
{
B3_ASSERT(i < rowCount);
B3_ASSERT(j < rowCount);
if (i > j)
{
b3Swap(i, j);
}
b3RowValueArray* vs = rows + i;
for (u32 c = 0; c < vs->count; ++c)
{
b3RowValue* rv = vs->values + c;
u32 column = rv->column;
if (column < j)
{
break;
}
if (column == j)
{
return rv->value;
}
}
return b3Mat33_zero;
}
inline void b3SparseSymMat33View::Diagonal(b3DiagMat33& out) const
{
B3_ASSERT(rowCount == out.n);
for (u32 i = 0; i < rowCount; ++i)
{
out[i] = (*this)(i, i);
}
}
inline void b3Mul(b3DenseVec3& out, const b3SparseSymMat33View& A, const b3DenseVec3& v)
{
B3_ASSERT(A.rowCount == out.n);
out.SetZero();
for (u32 i = 0; i < A.rowCount; ++i)
{
b3RowValueArray* rowArray = A.rows + i;
for (u32 c = 0; c < rowArray->count; ++c)
{
b3RowValue* rv = rowArray->values + c;
u32 j = rv->column;
b3Mat33 a = rv->value;
out[i] += a * v[j];
if (i != j)
{
out[j] += a * v[i];
}
}
}
}
inline b3DenseVec3 operator*(const b3SparseSymMat33View& A, const b3DenseVec3& v)
{
b3DenseVec3 result(v.n);
b3Mul(result, A, v);
return result;
}
#endif

View File

@ -24,6 +24,7 @@
#include <bounce/cloth/dense_vec3.h>
#include <bounce/cloth/diag_mat33.h>
#include <bounce/cloth/sparse_sym_mat33.h>
#include <bounce/cloth/sparse_sym_mat33_view.h>
#include <bounce/common/memory/stack_allocator.h>
#include <bounce/common/math/mat.h>
#include <bounce/dynamics/shapes/shape.h>
@ -277,6 +278,9 @@ void b3ClothSolver::Solve(float32 dt, const b3Vec3& gravity)
// A
b3SparseSymMat33 A = M - h * dfdv - h * h * dfdx;
// View for A
b3SparseSymMat33View viewA(A);
// b
b3DenseVec3 b = h * (sf + h * (dfdx * sv) + dfdx * sy);
@ -284,7 +288,7 @@ void b3ClothSolver::Solve(float32 dt, const b3Vec3& gravity)
// x
b3DenseVec3 x(m_particleCount);
u32 iterations = 0;
Solve(x, iterations, A, b, S, z, sx0);
Solve(x, iterations, viewA, b, S, z, sx0);
b3_clothSolverIterations = iterations;
#if 0
// Copy constraint forces to the contacts
@ -395,7 +399,7 @@ void b3ClothSolver::Solve(float32 dt, const b3Vec3& gravity)
}
void b3ClothSolver::Solve(b3DenseVec3& x, u32& iterations,
const b3SparseSymMat33& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const
const b3SparseSymMat33View& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const
{
B3_PROFILE("Cloth Solve Ax = b");
@ -446,11 +450,11 @@ void b3ClothSolver::Solve(b3DenseVec3& x, u32& iterations,
float32 delta_new = b3Dot(r, p);
// Set the tolerance.
const float32 tolerance = 10.0f * B3_EPSILON;
const float32 tolerance = 2.0f * B3_EPSILON;
// Maximum number of iterations.
// Stop at this iteration if diverged.
const u32 max_iterations = 20;
const u32 max_iterations = 50;
u32 iteration = 0;