improve mpcg
This improved performance significantly in some small test systems.
This commit is contained in:
parent
b202988d82
commit
079e6eddca
@ -124,8 +124,7 @@ struct b3SpringClothStep
|
|||||||
u32 iterations;
|
u32 iterations;
|
||||||
};
|
};
|
||||||
|
|
||||||
// This class implements a cloth. It treats cloth as a collection
|
// A cloth it treats cloth as a collection of masses connected by springs.
|
||||||
// of masses connected by springs.
|
|
||||||
// Large time steps can be taken.
|
// Large time steps can be taken.
|
||||||
// If accuracy and stability are required, not performance,
|
// If accuracy and stability are required, not performance,
|
||||||
// you can use this class instead of using b3Cloth.
|
// you can use this class instead of using b3Cloth.
|
||||||
@ -177,7 +176,7 @@ public:
|
|||||||
// Return the kinetic (or dynamic) energy in this system.
|
// Return the kinetic (or dynamic) energy in this system.
|
||||||
float32 GetEnergy() const;
|
float32 GetEnergy() const;
|
||||||
|
|
||||||
// Return the tension forces (due to springs) of each point mass.
|
// Return the tension forces (due to springs) acting at each point mass.
|
||||||
// Units are kg * m / s^2
|
// Units are kg * m / s^2
|
||||||
void GetTension(b3Array<b3Vec3>& tensions) const;
|
void GetTension(b3Array<b3Vec3>& tensions) const;
|
||||||
|
|
||||||
@ -222,6 +221,8 @@ protected:
|
|||||||
float32* m_m;
|
float32* m_m;
|
||||||
float32* m_inv_m;
|
float32* m_inv_m;
|
||||||
b3Vec3* m_y;
|
b3Vec3* m_y;
|
||||||
|
b3Vec3* m_z;
|
||||||
|
b3Vec3* m_x0;
|
||||||
b3MassType* m_types;
|
b3MassType* m_types;
|
||||||
u32 m_massCount;
|
u32 m_massCount;
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ class b3SpringCloth;
|
|||||||
class b3StackAllocator;
|
class b3StackAllocator;
|
||||||
|
|
||||||
struct b3DenseVec3;
|
struct b3DenseVec3;
|
||||||
|
struct b3DiagMat33;
|
||||||
struct b3SparseMat33;
|
struct b3SparseMat33;
|
||||||
|
|
||||||
struct b3MassContact;
|
struct b3MassContact;
|
||||||
@ -58,15 +59,15 @@ private:
|
|||||||
// Compute A and b in Ax = b
|
// Compute A and b in Ax = b
|
||||||
void Compute_A_b(b3SparseMat33& A, b3DenseVec3& b) const;
|
void Compute_A_b(b3SparseMat33& A, b3DenseVec3& b) const;
|
||||||
|
|
||||||
// Compute the initial guess for the iterative solver.
|
// Compute S.
|
||||||
void Compute_x0(b3DenseVec3& x0);
|
void Compute_S(b3DiagMat33& S);
|
||||||
|
|
||||||
// Compute the constraint projection matrix S.
|
// Compute z.
|
||||||
void Compute_S(b3Mat33* S);
|
void Compute_z(b3DenseVec3& z);
|
||||||
|
|
||||||
// Solve Ax = b.
|
// Solve Ax = b.
|
||||||
// Output x and the residual error f = Ax - b ~ 0.
|
// Output x and the residual error f = Ax - b ~ 0.
|
||||||
void Solve(b3DenseVec3& x0, b3DenseVec3& f, u32& iterations, const b3SparseMat33& A, const b3DenseVec3& b, const b3Mat33* S) const;
|
void Solve(b3DenseVec3& x, b3DenseVec3& f, u32& iterations, const b3SparseMat33& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const;
|
||||||
|
|
||||||
b3SpringCloth * m_cloth;
|
b3SpringCloth * m_cloth;
|
||||||
float32 m_h;
|
float32 m_h;
|
||||||
@ -82,6 +83,8 @@ private:
|
|||||||
float32* m_m;
|
float32* m_m;
|
||||||
float32* m_inv_m;
|
float32* m_inv_m;
|
||||||
b3Vec3* m_y;
|
b3Vec3* m_y;
|
||||||
|
b3Vec3* m_z;
|
||||||
|
b3Vec3* m_x0;
|
||||||
b3MassType* m_types;
|
b3MassType* m_types;
|
||||||
u32 m_massCount;
|
u32 m_massCount;
|
||||||
|
|
||||||
|
@ -43,6 +43,8 @@ b3SpringCloth::b3SpringCloth()
|
|||||||
m_v = nullptr;
|
m_v = nullptr;
|
||||||
m_f = nullptr;
|
m_f = nullptr;
|
||||||
m_y = nullptr;
|
m_y = nullptr;
|
||||||
|
m_z = nullptr;
|
||||||
|
m_x0 = nullptr;
|
||||||
m_m = nullptr;
|
m_m = nullptr;
|
||||||
m_inv_m = nullptr;
|
m_inv_m = nullptr;
|
||||||
m_types = nullptr;
|
m_types = nullptr;
|
||||||
@ -67,6 +69,8 @@ b3SpringCloth::~b3SpringCloth()
|
|||||||
b3Free(m_inv_m);
|
b3Free(m_inv_m);
|
||||||
b3Free(m_m);
|
b3Free(m_m);
|
||||||
b3Free(m_y);
|
b3Free(m_y);
|
||||||
|
b3Free(m_z);
|
||||||
|
b3Free(m_x0);
|
||||||
b3Free(m_types);
|
b3Free(m_types);
|
||||||
b3Free(m_contacts);
|
b3Free(m_contacts);
|
||||||
b3Free(m_springs);
|
b3Free(m_springs);
|
||||||
@ -212,6 +216,8 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
|
|||||||
m_m = (float32*)b3Alloc(m_massCount * sizeof(float32));
|
m_m = (float32*)b3Alloc(m_massCount * sizeof(float32));
|
||||||
m_inv_m = (float32*)b3Alloc(m_massCount * sizeof(float32));
|
m_inv_m = (float32*)b3Alloc(m_massCount * sizeof(float32));
|
||||||
m_y = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3));
|
m_y = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3));
|
||||||
|
m_z = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3));
|
||||||
|
m_x0 = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3));
|
||||||
m_types = (b3MassType*)b3Alloc(m_massCount * sizeof(b3MassType));
|
m_types = (b3MassType*)b3Alloc(m_massCount * sizeof(b3MassType));
|
||||||
m_contacts = (b3MassContact*)b3Alloc(m_massCount * sizeof(b3MassContact));
|
m_contacts = (b3MassContact*)b3Alloc(m_massCount * sizeof(b3MassContact));
|
||||||
|
|
||||||
@ -230,6 +236,8 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
|
|||||||
m_m[i] = 0.0f;
|
m_m[i] = 0.0f;
|
||||||
m_inv_m[i] = 0.0f;
|
m_inv_m[i] = 0.0f;
|
||||||
m_y[i].SetZero();
|
m_y[i].SetZero();
|
||||||
|
m_z[i].SetZero();
|
||||||
|
m_x0[i].SetZero();
|
||||||
m_types[i] = b3MassType::e_staticMass;
|
m_types[i] = b3MassType::e_staticMass;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -615,7 +623,6 @@ void b3SpringCloth::Step(float32 dt)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Integrate
|
// Integrate
|
||||||
|
|
||||||
b3SpringSolverDef solverDef;
|
b3SpringSolverDef solverDef;
|
||||||
solverDef.cloth = this;
|
solverDef.cloth = this;
|
||||||
solverDef.dt = dt;
|
solverDef.dt = dt;
|
||||||
|
@ -19,12 +19,17 @@
|
|||||||
#include <bounce/dynamics/cloth/spring_solver.h>
|
#include <bounce/dynamics/cloth/spring_solver.h>
|
||||||
#include <bounce/dynamics/cloth/spring_cloth.h>
|
#include <bounce/dynamics/cloth/spring_cloth.h>
|
||||||
#include <bounce/dynamics/cloth/dense_vec3.h>
|
#include <bounce/dynamics/cloth/dense_vec3.h>
|
||||||
|
#include <bounce/dynamics/cloth/diag_mat33.h>
|
||||||
#include <bounce/dynamics/cloth/sparse_mat33.h>
|
#include <bounce/dynamics/cloth/sparse_mat33.h>
|
||||||
#include <bounce/dynamics/shapes/shape.h>
|
#include <bounce/dynamics/shapes/shape.h>
|
||||||
#include <bounce/common/memory/stack_allocator.h>
|
#include <bounce/common/memory/stack_allocator.h>
|
||||||
|
|
||||||
// Here, we solve Ax = b using the Modified Conjugate Gradient method.
|
// Here, we solve Ax = b using the Modified Preconditioned Conjugate Gradient (MPCG) algorithm.
|
||||||
// This work is based on the paper "Large Steps in Cloth Simulation - David Baraff, Andrew Witkin".
|
// described in the paper:
|
||||||
|
// "Large Steps in Cloth Simulation - David Baraff, Andrew Witkin".
|
||||||
|
|
||||||
|
// Some improvements for the original MPCG algorithm are described in the paper:
|
||||||
|
// "On the modified conjugate gradient method in cloth simulation - Uri M. Ascher, Eddy Boxerman".
|
||||||
|
|
||||||
b3SpringSolver::b3SpringSolver(const b3SpringSolverDef& def)
|
b3SpringSolver::b3SpringSolver(const b3SpringSolverDef& def)
|
||||||
{
|
{
|
||||||
@ -42,6 +47,8 @@ b3SpringSolver::b3SpringSolver(const b3SpringSolverDef& def)
|
|||||||
m_m = m_cloth->m_m;
|
m_m = m_cloth->m_m;
|
||||||
m_inv_m = m_cloth->m_inv_m;
|
m_inv_m = m_cloth->m_inv_m;
|
||||||
m_y = m_cloth->m_y;
|
m_y = m_cloth->m_y;
|
||||||
|
m_z = m_cloth->m_y;
|
||||||
|
m_x0 = m_cloth->m_x0;
|
||||||
m_types = m_cloth->m_types;
|
m_types = m_cloth->m_types;
|
||||||
m_massCount = m_cloth->m_massCount;
|
m_massCount = m_cloth->m_massCount;
|
||||||
|
|
||||||
@ -58,8 +65,27 @@ b3SpringSolver::~b3SpringSolver()
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static B3_FORCE_INLINE b3SparseMat33 b3AllocSparse(b3StackAllocator* allocator, u32 M, u32 N)
|
||||||
|
{
|
||||||
|
u32 size = M * N;
|
||||||
|
b3Mat33* elements = (b3Mat33*)allocator->Allocate(size * sizeof(b3Mat33));
|
||||||
|
u32* cols = (u32*)allocator->Allocate(size * sizeof(u32));
|
||||||
|
u32* row_ptrs = (u32*)allocator->Allocate((M + 1) * sizeof(u32));
|
||||||
|
|
||||||
|
return b3SparseMat33(M, N, size, elements, row_ptrs, cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
static B3_FORCE_INLINE void b3FreeSparse(b3SparseMat33& matrix, b3StackAllocator* allocator)
|
||||||
|
{
|
||||||
|
allocator->Free(matrix.row_ptrs);
|
||||||
|
allocator->Free(matrix.cols);
|
||||||
|
allocator->Free(matrix.values);
|
||||||
|
}
|
||||||
|
|
||||||
void b3SpringSolver::Solve(b3DenseVec3& f)
|
void b3SpringSolver::Solve(b3DenseVec3& f)
|
||||||
{
|
{
|
||||||
|
B3_PROFILE("Solve");
|
||||||
|
|
||||||
//
|
//
|
||||||
m_Jx = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
|
m_Jx = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
|
||||||
m_Jv = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
|
m_Jv = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
|
||||||
@ -73,34 +99,42 @@ void b3SpringSolver::Solve(b3DenseVec3& f)
|
|||||||
// A = M - h * dfdv - h * h * dfdx
|
// A = M - h * dfdv - h * h * dfdx
|
||||||
// b = h * (f0 + h * dfdx * v0 + dfdx * y)
|
// b = h * (f0 + h * dfdx * v0 + dfdx * y)
|
||||||
|
|
||||||
// Allocate matrix memory for the worst case.
|
|
||||||
u32 nzCount = m_massCount * m_massCount;
|
|
||||||
b3Mat33* nzElements = (b3Mat33*)m_allocator->Allocate(nzCount * sizeof(b3Mat33));
|
|
||||||
u32* nzColumns = (u32*)m_allocator->Allocate(nzCount * sizeof(u32));
|
|
||||||
u32* rowPtrs = (u32*)m_allocator->Allocate((m_massCount + 1) * sizeof(u32));
|
|
||||||
|
|
||||||
{
|
{
|
||||||
b3SparseMat33 A(m_massCount, m_massCount, nzCount, nzElements, rowPtrs, nzColumns);
|
// A
|
||||||
|
b3SparseMat33 A = b3AllocSparse(m_allocator, m_massCount, m_massCount);
|
||||||
|
|
||||||
//
|
// b
|
||||||
b3DenseVec3 b(m_massCount);
|
b3DenseVec3 b(m_massCount);
|
||||||
|
|
||||||
// A, b
|
|
||||||
Compute_A_b(A, b);
|
Compute_A_b(A, b);
|
||||||
|
|
||||||
|
// S
|
||||||
|
b3DiagMat33 S(m_massCount);
|
||||||
|
Compute_S(S);
|
||||||
|
|
||||||
|
// z
|
||||||
|
b3DenseVec3 z(m_massCount);
|
||||||
|
Compute_z(z);
|
||||||
|
|
||||||
|
// x0
|
||||||
|
b3DenseVec3 x0(m_massCount);
|
||||||
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
|
{
|
||||||
|
x0[i] = m_x0[i];
|
||||||
|
}
|
||||||
|
|
||||||
// x
|
// x
|
||||||
b3DenseVec3 x(m_massCount);
|
b3DenseVec3 x(m_massCount);
|
||||||
|
|
||||||
// x0
|
|
||||||
Compute_x0(x);
|
|
||||||
|
|
||||||
// S
|
|
||||||
b3Mat33* S = (b3Mat33*)m_allocator->Allocate(m_massCount * sizeof(b3Mat33));
|
|
||||||
Compute_S(S);
|
|
||||||
|
|
||||||
// Solve Ax = b
|
// Solve Ax = b
|
||||||
Solve(x, f, m_iterations, A, b, S);
|
Solve(x, f, m_iterations, A, b, S, z, x0);
|
||||||
|
|
||||||
|
// Store x0
|
||||||
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
|
{
|
||||||
|
m_x0[i] = x[i];
|
||||||
|
}
|
||||||
|
|
||||||
// Update state
|
// Update state
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
@ -113,13 +147,9 @@ void b3SpringSolver::Solve(b3DenseVec3& f)
|
|||||||
m_x[i] += m_h * m_v[i] + m_y[i];
|
m_x[i] += m_h * m_v[i] + m_y[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
m_allocator->Free(S);
|
b3FreeSparse(A, m_allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_allocator->Free(rowPtrs);
|
|
||||||
m_allocator->Free(nzColumns);
|
|
||||||
m_allocator->Free(nzElements);
|
|
||||||
|
|
||||||
m_allocator->Free(m_Jv);
|
m_allocator->Free(m_Jv);
|
||||||
m_allocator->Free(m_Jx);
|
m_allocator->Free(m_Jx);
|
||||||
m_Jv = nullptr;
|
m_Jv = nullptr;
|
||||||
@ -128,6 +158,7 @@ void b3SpringSolver::Solve(b3DenseVec3& f)
|
|||||||
|
|
||||||
#define B3_INDEX(i, j, size) (i + j * size)
|
#define B3_INDEX(i, j, size) (i + j * size)
|
||||||
|
|
||||||
|
//
|
||||||
static void b3SetZero(b3Vec3* out, u32 size)
|
static void b3SetZero(b3Vec3* out, u32 size)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < size; ++i)
|
for (u32 i = 0; i < size; ++i)
|
||||||
@ -136,23 +167,16 @@ static void b3SetZero(b3Vec3* out, u32 size)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
static void b3SetZero(b3Mat33* out, u32 size)
|
static void b3SetZero(b3Mat33* out, u32 size)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < size * size; ++i)
|
for (u32 i = 0; i < size; ++i)
|
||||||
{
|
{
|
||||||
out[i].SetZero();
|
out[i].SetZero();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void b3SetZero_Jacobian(b3Mat33* out, u32 springCount)
|
//
|
||||||
{
|
|
||||||
for (u32 i = 0; i < springCount; ++i)
|
|
||||||
{
|
|
||||||
out[i].SetZero();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// J = dfdx or dvdx
|
|
||||||
static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount,
|
static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount,
|
||||||
const b3Mat33* J_ii, const b3Spring* springs, u32 springCount)
|
const b3Mat33* J_ii, const b3Spring* springs, u32 springCount)
|
||||||
{
|
{
|
||||||
@ -177,8 +201,8 @@ static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount,
|
|||||||
void b3SpringSolver::ApplySpringForces()
|
void b3SpringSolver::ApplySpringForces()
|
||||||
{
|
{
|
||||||
// Zero Jacobians
|
// Zero Jacobians
|
||||||
b3SetZero_Jacobian(m_Jx, m_springCount);
|
b3SetZero(m_Jx, m_springCount);
|
||||||
b3SetZero_Jacobian(m_Jv, m_springCount);
|
b3SetZero(m_Jv, m_springCount);
|
||||||
|
|
||||||
// Compute spring forces and Jacobians
|
// Compute spring forces and Jacobians
|
||||||
for (u32 i = 0; i < m_springCount; ++i)
|
for (u32 i = 0; i < m_springCount; ++i)
|
||||||
@ -249,10 +273,10 @@ void b3SpringSolver::Compute_A_b(b3SparseMat33& SA, b3DenseVec3& b) const
|
|||||||
{
|
{
|
||||||
// Compute dfdx, dfdv
|
// Compute dfdx, dfdv
|
||||||
b3Mat33* dfdx = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
b3Mat33* dfdx = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
||||||
b3SetZero(dfdx, m_massCount);
|
b3SetZero(dfdx, m_massCount * m_massCount);
|
||||||
|
|
||||||
b3Mat33* dfdv = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
b3Mat33* dfdv = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
||||||
b3SetZero(dfdv, m_massCount);
|
b3SetZero(dfdv, m_massCount * m_massCount);
|
||||||
|
|
||||||
for (u32 i = 0; i < m_springCount; ++i)
|
for (u32 i = 0; i < m_springCount; ++i)
|
||||||
{
|
{
|
||||||
@ -286,14 +310,14 @@ void b3SpringSolver::Compute_A_b(b3SparseMat33& SA, b3DenseVec3& b) const
|
|||||||
|
|
||||||
// A = 0
|
// A = 0
|
||||||
b3Mat33* A = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
b3Mat33* A = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
||||||
b3SetZero(A, m_massCount);
|
b3SetZero(A, m_massCount * m_massCount);
|
||||||
|
|
||||||
// A += M
|
// A += M
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
A[B3_INDEX(i, i, m_massCount)] += b3Diagonal(m_m[i]);
|
A[B3_INDEX(i, i, m_massCount)] += b3Diagonal(m_m[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// A += - h * dfdv - h * h * dfdx
|
// A += - h * dfdv - h * h * dfdx
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
@ -359,12 +383,15 @@ void b3SpringSolver::Compute_A_b(b3SparseMat33& SA, b3DenseVec3& b) const
|
|||||||
m_allocator->Free(dfdx);
|
m_allocator->Free(dfdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3SpringSolver::Compute_x0(b3DenseVec3& x0)
|
void b3SpringSolver::Compute_z(b3DenseVec3& z)
|
||||||
{
|
{
|
||||||
x0.SetZero();
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
|
{
|
||||||
|
z[i] = m_z[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3SpringSolver::Compute_S(b3Mat33* out)
|
void b3SpringSolver::Compute_S(b3DiagMat33& out)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
@ -410,80 +437,58 @@ void b3SpringSolver::Compute_S(b3Mat33* out)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// S * v
|
void b3SpringSolver::Solve(b3DenseVec3& x, b3DenseVec3& f, u32& iterations,
|
||||||
static void b3Filter(b3DenseVec3& out,
|
const b3SparseMat33& A, const b3DenseVec3& b, const b3DiagMat33& S, const b3DenseVec3& z, const b3DenseVec3& y) const
|
||||||
const b3DenseVec3& v, const b3Mat33* S, u32 massCount)
|
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
// P = diag(A)
|
||||||
{
|
b3DiagMat33 inv_P(m_massCount);
|
||||||
out[i] = S[i] * v[i];
|
A.AssembleDiagonal(inv_P);
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void b3SpringSolver::Solve(b3DenseVec3& dv, b3DenseVec3& e, u32& iterations, const b3SparseMat33& A, const b3DenseVec3& b, const b3Mat33* S) const
|
|
||||||
{
|
|
||||||
b3DenseVec3 P(m_massCount);
|
|
||||||
|
|
||||||
b3DenseVec3 inv_P(m_massCount);
|
|
||||||
|
|
||||||
// Compute P, P^-1
|
|
||||||
|
|
||||||
// P = diag(A)^-1
|
|
||||||
|
|
||||||
// diag(A)
|
|
||||||
b3Mat33* diagA = (b3Mat33*)m_allocator->Allocate(m_massCount * sizeof(b3Mat33));
|
|
||||||
A.AssembleDiagonal(diagA);
|
|
||||||
|
|
||||||
|
// Invert
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
b3Mat33 D = diagA[i];
|
b3Mat33& D = inv_P[i];
|
||||||
|
|
||||||
// Sylvester Criterion to ensure PD-ness
|
// Sylvester Criterion to ensure PD-ness
|
||||||
B3_ASSERT(b3Det(D.x, D.y, D.z) > B3_EPSILON);
|
B3_ASSERT(b3Det(D.x, D.y, D.z) > B3_EPSILON);
|
||||||
|
|
||||||
B3_ASSERT(D[0][0] != 0.0f);
|
B3_ASSERT(D.x.x != 0.0f);
|
||||||
B3_ASSERT(D[1][1] != 0.0f);
|
float32 xx = 1.0f / D.x.x;
|
||||||
B3_ASSERT(D[2][2] != 0.0f);
|
|
||||||
|
|
||||||
P[i] = b3Vec3(1.0f / D[0][0], 1.0f / D[1][1], 1.0f / D[2][2]);
|
B3_ASSERT(D.y.y != 0.0f);
|
||||||
inv_P[i] = b3Vec3(D[0][0], D[1][1], D[2][2]);
|
float32 yy = 1.0f / D.y.y;
|
||||||
|
|
||||||
|
B3_ASSERT(D.z.z != 0.0f);
|
||||||
|
float32 zz = 1.0f / D.z.z;
|
||||||
|
|
||||||
|
D = b3Diagonal(xx, yy, zz);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_allocator->Free(diagA);
|
// I
|
||||||
|
b3DiagMat33 I(m_massCount);
|
||||||
|
I.SetIdentity();
|
||||||
|
|
||||||
// delta0 = dot(filter(b), P * filter(b))
|
// x = S * y + (I - S) * z
|
||||||
b3DenseVec3 S_b(m_massCount);
|
x = (S * y) + (I - S) * z;
|
||||||
b3Filter(S_b, b, S, m_massCount);
|
|
||||||
|
|
||||||
// P * filter(b)
|
// b^ = S * (b - A * (I - S) * z)
|
||||||
b3DenseVec3 P_S_b(m_massCount);
|
b3DenseVec3 b_hat = S * (b - A * ((I - S) * z));
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
|
||||||
{
|
|
||||||
P_S_b[i][0] = P[i][0] * S_b[i][0];
|
|
||||||
P_S_b[i][1] = P[i][1] * S_b[i][1];
|
|
||||||
P_S_b[i][2] = P[i][2] * S_b[i][2];
|
|
||||||
}
|
|
||||||
|
|
||||||
float32 delta0 = b3Dot(S_b, P_S_b);
|
// b_delta = dot(b^, P^-1 * b_^)
|
||||||
|
float32 b_delta = b3Dot(b_hat, inv_P * b_hat);
|
||||||
|
|
||||||
// r = filter(b - Adv)
|
// r = S * (b - A * x)
|
||||||
b3DenseVec3 r = b - A * dv;
|
b3DenseVec3 r = S * (b - A * x);
|
||||||
b3Filter(r, r, S, m_massCount);
|
|
||||||
|
|
||||||
// c = filter(P^-1 * r)
|
// p = S * (P^-1 * r)
|
||||||
b3DenseVec3 c(m_massCount);
|
b3DenseVec3 p = S * (inv_P * r);
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
|
||||||
{
|
|
||||||
c[i][0] = inv_P[i][0] * r[i][0];
|
|
||||||
c[i][1] = inv_P[i][1] * r[i][1];
|
|
||||||
c[i][2] = inv_P[i][2] * r[i][2];
|
|
||||||
}
|
|
||||||
b3Filter(c, c, S, m_massCount);
|
|
||||||
|
|
||||||
// deltaNew = dot(r, c)
|
// deltaNew = dot(r, p)
|
||||||
float32 deltaNew = b3Dot(r, c);
|
float32 deltaNew = b3Dot(r, p);
|
||||||
B3_ASSERT(b3IsValid(deltaNew));
|
B3_ASSERT(b3IsValid(deltaNew));
|
||||||
|
|
||||||
|
// Tolerance.
|
||||||
|
// This is the main stopping criteria.
|
||||||
// [0, 1]
|
// [0, 1]
|
||||||
const float32 epsilon = 10.0f * B3_EPSILON;
|
const float32 epsilon = 10.0f * B3_EPSILON;
|
||||||
|
|
||||||
@ -492,50 +497,41 @@ void b3SpringSolver::Solve(b3DenseVec3& dv, b3DenseVec3& e, u32& iterations, con
|
|||||||
|
|
||||||
// Main iteration loop.
|
// Main iteration loop.
|
||||||
u32 iter = 0;
|
u32 iter = 0;
|
||||||
while (iter < maxIters && deltaNew > epsilon * epsilon * delta0)
|
while (iter < maxIters && deltaNew > epsilon * epsilon * b_delta)
|
||||||
{
|
{
|
||||||
// q = filter(A * c)
|
// s = S(A * p)
|
||||||
b3DenseVec3 q = A * c;
|
b3DenseVec3 s = S * (A * p);
|
||||||
b3Filter(q, q, S, m_massCount);
|
|
||||||
|
|
||||||
// alpha = deltaNew / dot(c, q)
|
// alpha = deltaNew / dot(c, q)
|
||||||
float32 alpha = deltaNew / b3Dot(c, q);
|
float32 alpha = deltaNew / b3Dot(p, s);
|
||||||
|
|
||||||
// dv = dv + alpha * c
|
// x = x + alpha * p
|
||||||
dv = dv + alpha * c;
|
x = x + alpha * p;
|
||||||
|
|
||||||
// r = r - alpha * q
|
// r = r - alpha * s
|
||||||
r = r - alpha * q;
|
r = r - alpha * s;
|
||||||
|
|
||||||
// s = inv_P * r
|
// h = inv_P * r
|
||||||
b3DenseVec3 s(m_massCount);
|
b3DenseVec3 h = inv_P * r;
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
|
||||||
{
|
|
||||||
s[i][0] = inv_P[i][0] * r[i][0];
|
|
||||||
s[i][1] = inv_P[i][1] * r[i][1];
|
|
||||||
s[i][2] = inv_P[i][2] * r[i][2];
|
|
||||||
}
|
|
||||||
|
|
||||||
// deltaOld = deltaNew
|
// deltaOld = deltaNew
|
||||||
float32 deltaOld = deltaNew;
|
float32 deltaOld = deltaNew;
|
||||||
|
|
||||||
// deltaNew = dot(r, s)
|
// deltaNew = dot(r, h)
|
||||||
deltaNew = b3Dot(r, s);
|
deltaNew = b3Dot(r, h);
|
||||||
B3_ASSERT(b3IsValid(deltaNew));
|
B3_ASSERT(b3IsValid(deltaNew));
|
||||||
|
|
||||||
// beta = deltaNew / deltaOld
|
// beta = deltaNew / deltaOld
|
||||||
float32 beta = deltaNew / deltaOld;
|
float32 beta = deltaNew / deltaOld;
|
||||||
|
|
||||||
// c = filter(s + beta * c)
|
// p = S * (h + beta * p)
|
||||||
c = s + beta * c;
|
p = S * (h + beta * p);
|
||||||
b3Filter(c, c, S, m_massCount);
|
|
||||||
|
|
||||||
++iter;
|
++iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
iterations = iter;
|
iterations = iter;
|
||||||
|
|
||||||
// Residual error
|
// Residual
|
||||||
// f = A * x - b
|
f = A * x - b;
|
||||||
e = A * dv - b;
|
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user