improve mpcg

This improved performance significantly in some small test systems.
This commit is contained in:
Irlan 2018-05-19 21:15:29 -03:00
parent b202988d82
commit 079e6eddca
4 changed files with 142 additions and 135 deletions

View File

@ -124,8 +124,7 @@ struct b3SpringClothStep
u32 iterations;
};
// This class implements a cloth. It treats cloth as a collection
// of masses connected by springs.
// A cloth it treats cloth as a collection of masses connected by springs.
// Large time steps can be taken.
// If accuracy and stability are required, not performance,
// you can use this class instead of using b3Cloth.
@ -177,7 +176,7 @@ public:
// Return the kinetic (or dynamic) energy in this system.
float32 GetEnergy() const;
// Return the tension forces (due to springs) of each point mass.
// Return the tension forces (due to springs) acting at each point mass.
// Units are kg * m / s^2
void GetTension(b3Array<b3Vec3>& tensions) const;
@ -222,6 +221,8 @@ protected:
float32* m_m;
float32* m_inv_m;
b3Vec3* m_y;
b3Vec3* m_z;
b3Vec3* m_x0;
b3MassType* m_types;
u32 m_massCount;

View File

@ -28,6 +28,7 @@ class b3SpringCloth;
class b3StackAllocator;
struct b3DenseVec3;
struct b3DiagMat33;
struct b3SparseMat33;
struct b3MassContact;
@ -58,15 +59,15 @@ private:
// Compute A and b in Ax = b
void Compute_A_b(b3SparseMat33& A, b3DenseVec3& b) const;
// Compute the initial guess for the iterative solver.
void Compute_x0(b3DenseVec3& x0);
// Compute S.
void Compute_S(b3DiagMat33& S);
// Compute the constraint projection matrix S.
void Compute_S(b3Mat33* S);
// Compute z.
void Compute_z(b3DenseVec3& z);
// Solve Ax = b.
// Output x and the residual error f = Ax - b ~ 0.
void Solve(b3DenseVec3& x0, b3DenseVec3& f, u32& iterations, const b3SparseMat33& A, const b3DenseVec3& b, const b3Mat33* S) const;
void Solve(b3DenseVec3& x, b3DenseVec3& f, u32& iterations, const b3SparseMat33& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const;
b3SpringCloth * m_cloth;
float32 m_h;
@ -82,6 +83,8 @@ private:
float32* m_m;
float32* m_inv_m;
b3Vec3* m_y;
b3Vec3* m_z;
b3Vec3* m_x0;
b3MassType* m_types;
u32 m_massCount;

View File

@ -43,6 +43,8 @@ b3SpringCloth::b3SpringCloth()
m_v = nullptr;
m_f = nullptr;
m_y = nullptr;
m_z = nullptr;
m_x0 = nullptr;
m_m = nullptr;
m_inv_m = nullptr;
m_types = nullptr;
@ -67,6 +69,8 @@ b3SpringCloth::~b3SpringCloth()
b3Free(m_inv_m);
b3Free(m_m);
b3Free(m_y);
b3Free(m_z);
b3Free(m_x0);
b3Free(m_types);
b3Free(m_contacts);
b3Free(m_springs);
@ -212,6 +216,8 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
m_m = (float32*)b3Alloc(m_massCount * sizeof(float32));
m_inv_m = (float32*)b3Alloc(m_massCount * sizeof(float32));
m_y = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3));
m_z = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3));
m_x0 = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3));
m_types = (b3MassType*)b3Alloc(m_massCount * sizeof(b3MassType));
m_contacts = (b3MassContact*)b3Alloc(m_massCount * sizeof(b3MassContact));
@ -230,6 +236,8 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
m_m[i] = 0.0f;
m_inv_m[i] = 0.0f;
m_y[i].SetZero();
m_z[i].SetZero();
m_x0[i].SetZero();
m_types[i] = b3MassType::e_staticMass;
}
@ -615,7 +623,6 @@ void b3SpringCloth::Step(float32 dt)
}
// Integrate
b3SpringSolverDef solverDef;
solverDef.cloth = this;
solverDef.dt = dt;

View File

@ -19,12 +19,17 @@
#include <bounce/dynamics/cloth/spring_solver.h>
#include <bounce/dynamics/cloth/spring_cloth.h>
#include <bounce/dynamics/cloth/dense_vec3.h>
#include <bounce/dynamics/cloth/diag_mat33.h>
#include <bounce/dynamics/cloth/sparse_mat33.h>
#include <bounce/dynamics/shapes/shape.h>
#include <bounce/common/memory/stack_allocator.h>
// Here, we solve Ax = b using the Modified Conjugate Gradient method.
// This work is based on the paper "Large Steps in Cloth Simulation - David Baraff, Andrew Witkin".
// Here, we solve Ax = b using the Modified Preconditioned Conjugate Gradient (MPCG) algorithm.
// described in the paper:
// "Large Steps in Cloth Simulation - David Baraff, Andrew Witkin".
// Some improvements for the original MPCG algorithm are described in the paper:
// "On the modified conjugate gradient method in cloth simulation - Uri M. Ascher, Eddy Boxerman".
b3SpringSolver::b3SpringSolver(const b3SpringSolverDef& def)
{
@ -42,6 +47,8 @@ b3SpringSolver::b3SpringSolver(const b3SpringSolverDef& def)
m_m = m_cloth->m_m;
m_inv_m = m_cloth->m_inv_m;
m_y = m_cloth->m_y;
m_z = m_cloth->m_y;
m_x0 = m_cloth->m_x0;
m_types = m_cloth->m_types;
m_massCount = m_cloth->m_massCount;
@ -58,8 +65,27 @@ b3SpringSolver::~b3SpringSolver()
}
static B3_FORCE_INLINE b3SparseMat33 b3AllocSparse(b3StackAllocator* allocator, u32 M, u32 N)
{
u32 size = M * N;
b3Mat33* elements = (b3Mat33*)allocator->Allocate(size * sizeof(b3Mat33));
u32* cols = (u32*)allocator->Allocate(size * sizeof(u32));
u32* row_ptrs = (u32*)allocator->Allocate((M + 1) * sizeof(u32));
return b3SparseMat33(M, N, size, elements, row_ptrs, cols);
}
static B3_FORCE_INLINE void b3FreeSparse(b3SparseMat33& matrix, b3StackAllocator* allocator)
{
allocator->Free(matrix.row_ptrs);
allocator->Free(matrix.cols);
allocator->Free(matrix.values);
}
void b3SpringSolver::Solve(b3DenseVec3& f)
{
B3_PROFILE("Solve");
//
m_Jx = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
m_Jv = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
@ -73,33 +99,41 @@ void b3SpringSolver::Solve(b3DenseVec3& f)
// A = M - h * dfdv - h * h * dfdx
// b = h * (f0 + h * dfdx * v0 + dfdx * y)
// Allocate matrix memory for the worst case.
u32 nzCount = m_massCount * m_massCount;
b3Mat33* nzElements = (b3Mat33*)m_allocator->Allocate(nzCount * sizeof(b3Mat33));
u32* nzColumns = (u32*)m_allocator->Allocate(nzCount * sizeof(u32));
u32* rowPtrs = (u32*)m_allocator->Allocate((m_massCount + 1) * sizeof(u32));
{
b3SparseMat33 A(m_massCount, m_massCount, nzCount, nzElements, rowPtrs, nzColumns);
// A
b3SparseMat33 A = b3AllocSparse(m_allocator, m_massCount, m_massCount);
//
// b
b3DenseVec3 b(m_massCount);
// A, b
Compute_A_b(A, b);
// S
b3DiagMat33 S(m_massCount);
Compute_S(S);
// z
b3DenseVec3 z(m_massCount);
Compute_z(z);
// x0
b3DenseVec3 x0(m_massCount);
for (u32 i = 0; i < m_massCount; ++i)
{
x0[i] = m_x0[i];
}
// x
b3DenseVec3 x(m_massCount);
// x0
Compute_x0(x);
// S
b3Mat33* S = (b3Mat33*)m_allocator->Allocate(m_massCount * sizeof(b3Mat33));
Compute_S(S);
// Solve Ax = b
Solve(x, f, m_iterations, A, b, S);
Solve(x, f, m_iterations, A, b, S, z, x0);
// Store x0
for (u32 i = 0; i < m_massCount; ++i)
{
m_x0[i] = x[i];
}
// Update state
for (u32 i = 0; i < m_massCount; ++i)
@ -113,13 +147,9 @@ void b3SpringSolver::Solve(b3DenseVec3& f)
m_x[i] += m_h * m_v[i] + m_y[i];
}
m_allocator->Free(S);
b3FreeSparse(A, m_allocator);
}
m_allocator->Free(rowPtrs);
m_allocator->Free(nzColumns);
m_allocator->Free(nzElements);
m_allocator->Free(m_Jv);
m_allocator->Free(m_Jx);
m_Jv = nullptr;
@ -128,6 +158,7 @@ void b3SpringSolver::Solve(b3DenseVec3& f)
#define B3_INDEX(i, j, size) (i + j * size)
//
static void b3SetZero(b3Vec3* out, u32 size)
{
for (u32 i = 0; i < size; ++i)
@ -136,23 +167,16 @@ static void b3SetZero(b3Vec3* out, u32 size)
}
}
//
static void b3SetZero(b3Mat33* out, u32 size)
{
for (u32 i = 0; i < size * size; ++i)
for (u32 i = 0; i < size; ++i)
{
out[i].SetZero();
}
}
static void b3SetZero_Jacobian(b3Mat33* out, u32 springCount)
{
for (u32 i = 0; i < springCount; ++i)
{
out[i].SetZero();
}
}
// J = dfdx or dvdx
//
static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount,
const b3Mat33* J_ii, const b3Spring* springs, u32 springCount)
{
@ -177,8 +201,8 @@ static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount,
void b3SpringSolver::ApplySpringForces()
{
// Zero Jacobians
b3SetZero_Jacobian(m_Jx, m_springCount);
b3SetZero_Jacobian(m_Jv, m_springCount);
b3SetZero(m_Jx, m_springCount);
b3SetZero(m_Jv, m_springCount);
// Compute spring forces and Jacobians
for (u32 i = 0; i < m_springCount; ++i)
@ -249,10 +273,10 @@ void b3SpringSolver::Compute_A_b(b3SparseMat33& SA, b3DenseVec3& b) const
{
// Compute dfdx, dfdv
b3Mat33* dfdx = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
b3SetZero(dfdx, m_massCount);
b3SetZero(dfdx, m_massCount * m_massCount);
b3Mat33* dfdv = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
b3SetZero(dfdv, m_massCount);
b3SetZero(dfdv, m_massCount * m_massCount);
for (u32 i = 0; i < m_springCount; ++i)
{
@ -286,7 +310,7 @@ void b3SpringSolver::Compute_A_b(b3SparseMat33& SA, b3DenseVec3& b) const
// A = 0
b3Mat33* A = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
b3SetZero(A, m_massCount);
b3SetZero(A, m_massCount * m_massCount);
// A += M
for (u32 i = 0; i < m_massCount; ++i)
@ -359,12 +383,15 @@ void b3SpringSolver::Compute_A_b(b3SparseMat33& SA, b3DenseVec3& b) const
m_allocator->Free(dfdx);
}
void b3SpringSolver::Compute_x0(b3DenseVec3& x0)
void b3SpringSolver::Compute_z(b3DenseVec3& z)
{
x0.SetZero();
for (u32 i = 0; i < m_massCount; ++i)
{
z[i] = m_z[i];
}
}
void b3SpringSolver::Compute_S(b3Mat33* out)
void b3SpringSolver::Compute_S(b3DiagMat33& out)
{
for (u32 i = 0; i < m_massCount; ++i)
{
@ -410,80 +437,58 @@ void b3SpringSolver::Compute_S(b3Mat33* out)
}
}
// S * v
static void b3Filter(b3DenseVec3& out,
const b3DenseVec3& v, const b3Mat33* S, u32 massCount)
void b3SpringSolver::Solve(b3DenseVec3& x, b3DenseVec3& f, u32& iterations,
const b3SparseMat33& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const
{
for (u32 i = 0; i < massCount; ++i)
{
out[i] = S[i] * v[i];
}
}
void b3SpringSolver::Solve(b3DenseVec3& dv, b3DenseVec3& e, u32& iterations, const b3SparseMat33& A, const b3DenseVec3& b, const b3Mat33* S) const
{
b3DenseVec3 P(m_massCount);
b3DenseVec3 inv_P(m_massCount);
// Compute P, P^-1
// P = diag(A)^-1
// diag(A)
b3Mat33* diagA = (b3Mat33*)m_allocator->Allocate(m_massCount * sizeof(b3Mat33));
A.AssembleDiagonal(diagA);
// P = diag(A)
b3DiagMat33 inv_P(m_massCount);
A.AssembleDiagonal(inv_P);
// Invert
for (u32 i = 0; i < m_massCount; ++i)
{
b3Mat33 D = diagA[i];
b3Mat33& D = inv_P[i];
// Sylvester Criterion to ensure PD-ness
B3_ASSERT(b3Det(D.x, D.y, D.z) > B3_EPSILON);
B3_ASSERT(D[0][0] != 0.0f);
B3_ASSERT(D[1][1] != 0.0f);
B3_ASSERT(D[2][2] != 0.0f);
B3_ASSERT(D.x.x != 0.0f);
float32 xx = 1.0f / D.x.x;
P[i] = b3Vec3(1.0f / D[0][0], 1.0f / D[1][1], 1.0f / D[2][2]);
inv_P[i] = b3Vec3(D[0][0], D[1][1], D[2][2]);
B3_ASSERT(D.y.y != 0.0f);
float32 yy = 1.0f / D.y.y;
B3_ASSERT(D.z.z != 0.0f);
float32 zz = 1.0f / D.z.z;
D = b3Diagonal(xx, yy, zz);
}
m_allocator->Free(diagA);
// I
b3DiagMat33 I(m_massCount);
I.SetIdentity();
// delta0 = dot(filter(b), P * filter(b))
b3DenseVec3 S_b(m_massCount);
b3Filter(S_b, b, S, m_massCount);
// x = S * y + (I - S) * z
x = (S * y) + (I - S) * z;
// P * filter(b)
b3DenseVec3 P_S_b(m_massCount);
for (u32 i = 0; i < m_massCount; ++i)
{
P_S_b[i][0] = P[i][0] * S_b[i][0];
P_S_b[i][1] = P[i][1] * S_b[i][1];
P_S_b[i][2] = P[i][2] * S_b[i][2];
}
// b^ = S * (b - A * (I - S) * z)
b3DenseVec3 b_hat = S * (b - A * ((I - S) * z));
float32 delta0 = b3Dot(S_b, P_S_b);
// b_delta = dot(b^, P^-1 * b_^)
float32 b_delta = b3Dot(b_hat, inv_P * b_hat);
// r = filter(b - Adv)
b3DenseVec3 r = b - A * dv;
b3Filter(r, r, S, m_massCount);
// r = S * (b - A * x)
b3DenseVec3 r = S * (b - A * x);
// c = filter(P^-1 * r)
b3DenseVec3 c(m_massCount);
for (u32 i = 0; i < m_massCount; ++i)
{
c[i][0] = inv_P[i][0] * r[i][0];
c[i][1] = inv_P[i][1] * r[i][1];
c[i][2] = inv_P[i][2] * r[i][2];
}
b3Filter(c, c, S, m_massCount);
// p = S * (P^-1 * r)
b3DenseVec3 p = S * (inv_P * r);
// deltaNew = dot(r, c)
float32 deltaNew = b3Dot(r, c);
// deltaNew = dot(r, p)
float32 deltaNew = b3Dot(r, p);
B3_ASSERT(b3IsValid(deltaNew));
// Tolerance.
// This is the main stopping criteria.
// [0, 1]
const float32 epsilon = 10.0f * B3_EPSILON;
@ -492,50 +497,41 @@ void b3SpringSolver::Solve(b3DenseVec3& dv, b3DenseVec3& e, u32& iterations, con
// Main iteration loop.
u32 iter = 0;
while (iter < maxIters && deltaNew > epsilon * epsilon * delta0)
while (iter < maxIters && deltaNew > epsilon * epsilon * b_delta)
{
// q = filter(A * c)
b3DenseVec3 q = A * c;
b3Filter(q, q, S, m_massCount);
// s = S(A * p)
b3DenseVec3 s = S * (A * p);
// alpha = deltaNew / dot(c, q)
float32 alpha = deltaNew / b3Dot(c, q);
float32 alpha = deltaNew / b3Dot(p, s);
// dv = dv + alpha * c
dv = dv + alpha * c;
// x = x + alpha * p
x = x + alpha * p;
// r = r - alpha * q
r = r - alpha * q;
// r = r - alpha * s
r = r - alpha * s;
// s = inv_P * r
b3DenseVec3 s(m_massCount);
for (u32 i = 0; i < m_massCount; ++i)
{
s[i][0] = inv_P[i][0] * r[i][0];
s[i][1] = inv_P[i][1] * r[i][1];
s[i][2] = inv_P[i][2] * r[i][2];
}
// h = inv_P * r
b3DenseVec3 h = inv_P * r;
// deltaOld = deltaNew
float32 deltaOld = deltaNew;
// deltaNew = dot(r, s)
deltaNew = b3Dot(r, s);
// deltaNew = dot(r, h)
deltaNew = b3Dot(r, h);
B3_ASSERT(b3IsValid(deltaNew));
// beta = deltaNew / deltaOld
float32 beta = deltaNew / deltaOld;
// c = filter(s + beta * c)
c = s + beta * c;
b3Filter(c, c, S, m_massCount);
// p = S * (h + beta * p)
p = S * (h + beta * p);
++iter;
}
iterations = iter;
// Residual error
// f = A * x - b
e = A * dv - b;
// Residual
f = A * x - b;
}