improve CG performance using CSR matrix format
This commit is contained in:
@ -107,10 +107,10 @@ enum b3MassType
|
|||||||
//
|
//
|
||||||
struct b3MassContact
|
struct b3MassContact
|
||||||
{
|
{
|
||||||
|
u32 j;
|
||||||
b3Vec3 n, t1, t2;
|
b3Vec3 n, t1, t2;
|
||||||
float32 Fn, Ft1, Ft2;
|
float32 Fn, Ft1, Ft2;
|
||||||
u32 j;
|
bool lockN, lockT1, lockT2;
|
||||||
bool lockOnSurface, slideOnSurface;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Time step statistics
|
// Time step statistics
|
||||||
@ -186,7 +186,7 @@ protected:
|
|||||||
// Update contacts.
|
// Update contacts.
|
||||||
// This is where some contacts might be initiated or terminated.
|
// This is where some contacts might be initiated or terminated.
|
||||||
void UpdateContacts();
|
void UpdateContacts();
|
||||||
|
|
||||||
b3StackAllocator* m_allocator;
|
b3StackAllocator* m_allocator;
|
||||||
|
|
||||||
b3Mesh* m_mesh;
|
b3Mesh* m_mesh;
|
||||||
@ -252,7 +252,7 @@ inline void b3SpringCloth::SetType(u32 i, b3MassType type)
|
|||||||
m_v[i].SetZero();
|
m_v[i].SetZero();
|
||||||
m_y[i].SetZero();
|
m_y[i].SetZero();
|
||||||
|
|
||||||
m_contacts[i].lockOnSurface = false;
|
m_contacts[i].lockN = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,6 +25,9 @@
|
|||||||
class b3SpringCloth;
|
class b3SpringCloth;
|
||||||
class b3StackAllocator;
|
class b3StackAllocator;
|
||||||
|
|
||||||
|
struct b3DenseVec3;
|
||||||
|
struct b3SparseMat33;
|
||||||
|
|
||||||
struct b3MassContact;
|
struct b3MassContact;
|
||||||
struct b3Spring;
|
struct b3Spring;
|
||||||
|
|
||||||
@ -43,25 +46,25 @@ public:
|
|||||||
|
|
||||||
~b3SpringSolver();
|
~b3SpringSolver();
|
||||||
|
|
||||||
void Solve(b3Vec3* constraintForces);
|
void Solve(b3DenseVec3& extraForces);
|
||||||
|
|
||||||
u32 GetIterations() const;
|
u32 GetIterations() const;
|
||||||
private:
|
private:
|
||||||
// Apply internal forces and store their unique derivatives.
|
// Apply internal forces and store their unique derivatives.
|
||||||
void InitializeSpringForces();
|
void ApplySpringForces();
|
||||||
|
|
||||||
// Initialize b, from Ax = b
|
// Compute A and b in Ax = b
|
||||||
void Compute_b(b3Vec3* b) const;
|
void Compute_A_b(b3SparseMat33& A, b3DenseVec3& b) const;
|
||||||
|
|
||||||
// Solve Ax = b using the Modified Conjugate Gradient (MCG).
|
// Solve Ax = b using the Modified Conjugate Gradient (MCG).
|
||||||
// Output x and the residual error f.
|
// Output x and the residual error f.
|
||||||
void Solve_MCG(b3Vec3* x, b3Vec3* f, u32& iterations, const b3Vec3* b) const;
|
void Solve_MCG(b3DenseVec3& x, const b3SparseMat33& A, b3DenseVec3& f, u32& iterations, const b3DenseVec3& b) const;
|
||||||
|
|
||||||
// Solve Ax = b using MCG with Jacobi preconditioning.
|
// Solve Ax = b using MCG with Jacobi preconditioning.
|
||||||
// Output x and the residual error f.
|
// Output x and the residual error f.
|
||||||
// This method is slower than MCG because we have to compute the preconditioning
|
// This method is slower than MCG because we have to compute the preconditioning
|
||||||
// matrix P, but it can improve convergence.
|
// matrix P, but it can improve convergence.
|
||||||
void Solve_MPCG(b3Vec3* x, b3Vec3* f, u32& iterations, const b3Vec3* b) const;
|
void Solve_MPCG(b3DenseVec3& x, const b3SparseMat33& A, b3DenseVec3& f, u32& iterations, const b3DenseVec3& b) const;
|
||||||
|
|
||||||
b3SpringCloth * m_cloth;
|
b3SpringCloth * m_cloth;
|
||||||
float32 m_h;
|
float32 m_h;
|
||||||
|
@ -18,12 +18,18 @@
|
|||||||
|
|
||||||
#include <bounce/dynamics/cloth/spring_cloth.h>
|
#include <bounce/dynamics/cloth/spring_cloth.h>
|
||||||
#include <bounce/dynamics/cloth/spring_solver.h>
|
#include <bounce/dynamics/cloth/spring_solver.h>
|
||||||
|
#include <bounce/dynamics/cloth/dense_vec3.h>
|
||||||
|
#include <bounce/dynamics/cloth/sparse_mat33.h>
|
||||||
#include <bounce/dynamics/shapes/shape.h>
|
#include <bounce/dynamics/shapes/shape.h>
|
||||||
#include <bounce/collision/shapes/mesh.h>
|
#include <bounce/collision/shapes/mesh.h>
|
||||||
#include <bounce/common/memory/stack_allocator.h>
|
#include <bounce/common/memory/stack_allocator.h>
|
||||||
|
|
||||||
#define B3_FORCE_THRESHOLD (0.1f)
|
#define B3_FORCE_THRESHOLD (0.1f)
|
||||||
|
|
||||||
|
#define B3_CLOTH_BENDING 0
|
||||||
|
|
||||||
|
#define B3_CLOTH_FRICTION 0
|
||||||
|
|
||||||
b3SpringCloth::b3SpringCloth()
|
b3SpringCloth::b3SpringCloth()
|
||||||
{
|
{
|
||||||
m_allocator = nullptr;
|
m_allocator = nullptr;
|
||||||
@ -95,7 +101,7 @@ static void b3FindEdges(b3UniqueEdge* uniqueEdges, u32& uniqueCount, b3SharedEdg
|
|||||||
{
|
{
|
||||||
u32 t1v1 = i1s[j1];
|
u32 t1v1 = i1s[j1];
|
||||||
|
|
||||||
u32 t1v2 = i1s[ b3NextIndex(j1) ];
|
u32 t1v2 = i1s[b3NextIndex(j1)];
|
||||||
|
|
||||||
u32 foundCount = 0;
|
u32 foundCount = 0;
|
||||||
|
|
||||||
@ -107,7 +113,7 @@ static void b3FindEdges(b3UniqueEdge* uniqueEdges, u32& uniqueCount, b3SharedEdg
|
|||||||
for (u32 j2 = 0; j2 < 3; ++j2)
|
for (u32 j2 = 0; j2 < 3; ++j2)
|
||||||
{
|
{
|
||||||
u32 t2v1 = i2s[j2];
|
u32 t2v1 = i2s[j2];
|
||||||
u32 t2v2 = i2s[ b3NextIndex(j2) ];
|
u32 t2v2 = i2s[b3NextIndex(j2)];
|
||||||
|
|
||||||
if (t1v1 == t2v2 && t1v2 == t2v1)
|
if (t1v1 == t2v2 && t1v2 == t2v1)
|
||||||
{
|
{
|
||||||
@ -154,10 +160,10 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
|
|||||||
B3_ASSERT(def.density > 0.0f);
|
B3_ASSERT(def.density > 0.0f);
|
||||||
|
|
||||||
m_allocator = def.allocator;
|
m_allocator = def.allocator;
|
||||||
|
|
||||||
m_mesh = def.mesh;
|
m_mesh = def.mesh;
|
||||||
m_r = def.r;
|
m_r = def.r;
|
||||||
|
|
||||||
m_gravity = def.gravity;
|
m_gravity = def.gravity;
|
||||||
|
|
||||||
const b3Mesh* m = m_mesh;
|
const b3Mesh* m = m_mesh;
|
||||||
@ -177,8 +183,9 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
|
|||||||
m_contacts[i].Fn = 0.0f;
|
m_contacts[i].Fn = 0.0f;
|
||||||
m_contacts[i].Ft1 = 0.0f;
|
m_contacts[i].Ft1 = 0.0f;
|
||||||
m_contacts[i].Ft2 = 0.0f;
|
m_contacts[i].Ft2 = 0.0f;
|
||||||
m_contacts[i].lockOnSurface = false;
|
m_contacts[i].lockN = false;
|
||||||
m_contacts[i].slideOnSurface = false;
|
m_contacts[i].lockT1 = false;
|
||||||
|
m_contacts[i].lockT2 = false;
|
||||||
|
|
||||||
m_x[i] = m->vertices[i];
|
m_x[i] = m->vertices[i];
|
||||||
m_v[i].SetZero();
|
m_v[i].SetZero();
|
||||||
@ -201,7 +208,7 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
|
|||||||
float32 area = b3Area(p1, p2, p3);
|
float32 area = b3Area(p1, p2, p3);
|
||||||
|
|
||||||
B3_ASSERT(area > B3_EPSILON);
|
B3_ASSERT(area > B3_EPSILON);
|
||||||
|
|
||||||
float32 mass = def.density * area;
|
float32 mass = def.density * area;
|
||||||
|
|
||||||
const float32 inv3 = 1.0f / 3.0f;
|
const float32 inv3 = 1.0f / 3.0f;
|
||||||
@ -251,15 +258,15 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
|
|||||||
++m_springCount;
|
++m_springCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if B3_CLOTH_BENDING == 1
|
||||||
// Bending
|
// Bending
|
||||||
/*
|
|
||||||
for (u32 i = 0; i < sharedCount; ++i)
|
for (u32 i = 0; i < sharedCount; ++i)
|
||||||
{
|
{
|
||||||
b3SharedEdge* e = sharedEdges + i;
|
b3SharedEdge* e = sharedEdges + i;
|
||||||
|
|
||||||
b3Vec3 p1 = m->vertices[e->nsv1];
|
b3Vec3 p1 = m->vertices[e->nsv1];
|
||||||
b3Vec3 p2 = m->vertices[e->nsv2];
|
b3Vec3 p2 = m->vertices[e->nsv2];
|
||||||
|
|
||||||
b3Spring* S = m_springs + m_springCount;
|
b3Spring* S = m_springs + m_springCount;
|
||||||
S->type = e_bendSpring;
|
S->type = e_bendSpring;
|
||||||
S->i1 = e->nsv1;
|
S->i1 = e->nsv1;
|
||||||
@ -269,7 +276,8 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def)
|
|||||||
S->kd = def.kd;
|
S->kd = def.kd;
|
||||||
++m_springCount;
|
++m_springCount;
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
#endif // #if B3_CLOTH_BENDING
|
||||||
|
|
||||||
m_allocator->Free(uniqueEdges);
|
m_allocator->Free(uniqueEdges);
|
||||||
m_allocator->Free(sharedEdges);
|
m_allocator->Free(sharedEdges);
|
||||||
@ -316,8 +324,7 @@ void b3SpringCloth::UpdateContacts()
|
|||||||
|
|
||||||
b3MassContact* c = m_contacts + i;
|
b3MassContact* c = m_contacts + i;
|
||||||
|
|
||||||
bool wasLocked = c->lockOnSurface;
|
bool wasLockedN = c->lockN;
|
||||||
bool wasSliding = c->slideOnSurface;
|
|
||||||
|
|
||||||
b3Sphere s1;
|
b3Sphere s1;
|
||||||
s1.vertex = m_x[i];
|
s1.vertex = m_x[i];
|
||||||
@ -354,8 +361,9 @@ void b3SpringCloth::UpdateContacts()
|
|||||||
c->Fn = 0.0f;
|
c->Fn = 0.0f;
|
||||||
c->Ft1 = 0.0f;
|
c->Ft1 = 0.0f;
|
||||||
c->Ft2 = 0.0f;
|
c->Ft2 = 0.0f;
|
||||||
c->lockOnSurface = false;
|
c->lockN = false;
|
||||||
c->slideOnSurface = false;
|
c->lockT1 = false;
|
||||||
|
c->lockT2 = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,40 +377,99 @@ void b3SpringCloth::UpdateContacts()
|
|||||||
m_y[i] -= s * n;
|
m_y[i] -= s * n;
|
||||||
|
|
||||||
// Update contact state
|
// Update contact state
|
||||||
if (wasLocked)
|
if (wasLockedN)
|
||||||
{
|
{
|
||||||
// Was the contact force attractive?
|
// Was the contact force attractive?
|
||||||
if (c->Fn < B3_FORCE_THRESHOLD)
|
if (c->Fn < B3_FORCE_THRESHOLD)
|
||||||
{
|
{
|
||||||
// Terminate the contact.
|
// Terminate the contact.
|
||||||
c->lockOnSurface = false;
|
c->Fn = 0.0f;
|
||||||
|
c->Ft1 = 0.0f;
|
||||||
|
c->Ft2 = 0.0f;
|
||||||
|
c->lockN = false;
|
||||||
|
c->lockT1 = false;
|
||||||
|
c->lockT2 = false;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since the contact force was repulsive
|
// Since the contact force was repulsive
|
||||||
// maintain the acceleration constraint.
|
// maintain the normal acceleration constraint.
|
||||||
c->n = n;
|
|
||||||
c->j = bestIndex;
|
c->j = bestIndex;
|
||||||
c->lockOnSurface = true;
|
c->n = n;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// The contact has began.
|
// The contact has began.
|
||||||
|
c->j = bestIndex;
|
||||||
c->n = n;
|
c->n = n;
|
||||||
c->Fn = 0.0f;
|
c->Fn = 0.0f;
|
||||||
c->Ft1 = 0.0f;
|
c->Ft1 = 0.0f;
|
||||||
c->Ft2 = 0.0f;
|
c->Ft2 = 0.0f;
|
||||||
c->j = bestIndex;
|
c->lockN = true;
|
||||||
c->lockOnSurface = true;
|
|
||||||
|
|
||||||
// Relative velocity
|
|
||||||
b3Vec3 dv = m_v[i];
|
|
||||||
|
|
||||||
b3MakeTangents(c->t1, c->t2, dv, n);
|
|
||||||
c->slideOnSurface = false;
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if B3_CLOTH_FRICTION == 1
|
||||||
|
|
||||||
|
// Apply friction impulses
|
||||||
|
|
||||||
|
// Note without a friction force, the tangential acceleration won't be
|
||||||
|
// removed.
|
||||||
|
|
||||||
|
// Relative velocity
|
||||||
|
b3Vec3 dv = m_v[i];
|
||||||
|
|
||||||
|
b3MakeTangents(c->t1, c->t2, dv, n);
|
||||||
|
|
||||||
|
// Coefficients of friction for the solid
|
||||||
|
const float32 uk = shape->GetFriction();
|
||||||
|
const float32 us = 2.0f * uk;
|
||||||
|
|
||||||
|
float32 dvn = b3Dot(dv, n);
|
||||||
|
float32 normalImpulse = -m_inv_m[i] * dvn;
|
||||||
|
|
||||||
|
b3Vec3 ts[2];
|
||||||
|
ts[0] = c->t1;
|
||||||
|
ts[1] = c->t2;
|
||||||
|
|
||||||
|
bool lockT[2];
|
||||||
|
|
||||||
|
for (u32 k = 0; k < 2; ++k)
|
||||||
|
{
|
||||||
|
b3Vec3 t = ts[k];
|
||||||
|
|
||||||
|
float32 dvt = b3Dot(dv, t);
|
||||||
|
float32 tangentImpulse = -m_inv_m[i] * dvt;
|
||||||
|
|
||||||
|
float32 maxStaticImpulse = us * normalImpulse;
|
||||||
|
if (tangentImpulse * tangentImpulse > maxStaticImpulse * maxStaticImpulse)
|
||||||
|
{
|
||||||
|
lockT[k] = false;
|
||||||
|
|
||||||
|
// Dynamic friction
|
||||||
|
float32 maxDynamicImpulse = uk * normalImpulse;
|
||||||
|
if (tangentImpulse * tangentImpulse > maxDynamicImpulse * maxDynamicImpulse)
|
||||||
|
{
|
||||||
|
b3Vec3 P = tangentImpulse * t;
|
||||||
|
|
||||||
|
m_v[i] += m_m[i] * P;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lockT[k] = true;
|
||||||
|
|
||||||
|
// Static friction
|
||||||
|
b3Vec3 P = tangentImpulse * t;
|
||||||
|
|
||||||
|
m_v[i] += m_m[i] * P;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
c->lockT1 = lockT[0];
|
||||||
|
c->lockT2 = lockT[1];
|
||||||
|
|
||||||
|
#endif // #if B3_CLOTH_FRICTION
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -426,6 +493,7 @@ void b3SpringCloth::Step(float32 dt)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Integrate
|
// Integrate
|
||||||
|
|
||||||
b3SpringSolverDef solverDef;
|
b3SpringSolverDef solverDef;
|
||||||
solverDef.cloth = this;
|
solverDef.cloth = this;
|
||||||
solverDef.dt = dt;
|
solverDef.dt = dt;
|
||||||
@ -434,30 +502,28 @@ void b3SpringCloth::Step(float32 dt)
|
|||||||
|
|
||||||
// Extra constraint forces that should have been applied to satisfy the constraints
|
// Extra constraint forces that should have been applied to satisfy the constraints
|
||||||
// todo Find the applied constraint forces.
|
// todo Find the applied constraint forces.
|
||||||
b3Vec3* forces = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 forces(m_massCount);
|
||||||
|
|
||||||
solver.Solve(forces);
|
solver.Solve(forces);
|
||||||
|
|
||||||
m_step.iterations = solver.GetIterations();
|
m_step.iterations = solver.GetIterations();
|
||||||
|
|
||||||
// Store constraint forces for physics logic
|
// Store constraint forces for physics logic
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
b3Vec3 force = forces[i];
|
b3Vec3 force = forces[i];
|
||||||
|
|
||||||
b3MassContact* contact = m_contacts + i;
|
b3MassContact* c = m_contacts + i;
|
||||||
|
|
||||||
// Signed normal force magnitude
|
// Signed normal force magnitude
|
||||||
contact->Fn = b3Dot(force, contact->n);
|
c->Fn = b3Dot(force, c->n);
|
||||||
|
|
||||||
// Signed tangent forces magnitude
|
// Signed tangent forces magnitude
|
||||||
contact->Ft1 = b3Dot(force, contact->t1);
|
c->Ft1 = b3Dot(force, c->t1);
|
||||||
contact->Ft2 = b3Dot(force, contact->t2);
|
c->Ft2 = b3Dot(force, c->t2);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_allocator->Free(forces);
|
// Clear position alteration
|
||||||
|
|
||||||
// Clear position correction
|
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
m_y[i].SetZero();
|
m_y[i].SetZero();
|
||||||
@ -484,7 +550,7 @@ void b3SpringCloth::Draw(b3Draw* draw) const
|
|||||||
|
|
||||||
for (u32 i = 0; i < m->vertexCount; ++i)
|
for (u32 i = 0; i < m->vertexCount; ++i)
|
||||||
{
|
{
|
||||||
if (m_contacts[i].lockOnSurface)
|
if (m_contacts[i].lockN)
|
||||||
{
|
{
|
||||||
if (m_contacts[i].Fn < B3_FORCE_THRESHOLD)
|
if (m_contacts[i].Fn < B3_FORCE_THRESHOLD)
|
||||||
{
|
{
|
||||||
|
@ -18,6 +18,8 @@
|
|||||||
|
|
||||||
#include <bounce/dynamics/cloth/spring_solver.h>
|
#include <bounce/dynamics/cloth/spring_solver.h>
|
||||||
#include <bounce/dynamics/cloth/spring_cloth.h>
|
#include <bounce/dynamics/cloth/spring_cloth.h>
|
||||||
|
#include <bounce/dynamics/cloth/dense_vec3.h>
|
||||||
|
#include <bounce/dynamics/cloth/sparse_mat33.h>
|
||||||
#include <bounce/common/memory/stack_allocator.h>
|
#include <bounce/common/memory/stack_allocator.h>
|
||||||
|
|
||||||
// Here, we solve Ax = b using the Modified Conjugate Gradient method.
|
// Here, we solve Ax = b using the Modified Conjugate Gradient method.
|
||||||
@ -58,48 +60,61 @@ b3SpringSolver::~b3SpringSolver()
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3SpringSolver::Solve(b3Vec3* f)
|
void b3SpringSolver::Solve(b3DenseVec3& f)
|
||||||
{
|
{
|
||||||
|
//
|
||||||
m_Jx = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
|
m_Jx = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
|
||||||
m_Jv = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
|
m_Jv = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33));
|
||||||
|
|
||||||
// Compute and apply spring forces, store their unique derivatives.
|
// Apply spring forces. Also, store their unique derivatives.
|
||||||
InitializeSpringForces();
|
ApplySpringForces();
|
||||||
|
|
||||||
// Integrate
|
// Integrate
|
||||||
|
|
||||||
// Solve Ax = b, where
|
// Solve Ax = b, where
|
||||||
// A = M - h * dfdv - h * h * dfdx
|
// A = M - h * dfdv - h * h * dfdx
|
||||||
// b = h * (f0 + h * dfdx * v0 + dfdx * y)
|
// b = h * (f0 + h * dfdx * v0 + dfdx * y)
|
||||||
|
|
||||||
//
|
|
||||||
b3Vec3* b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
|
||||||
Compute_b(b);
|
|
||||||
|
|
||||||
//
|
|
||||||
b3Vec3* x = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
|
||||||
|
|
||||||
// Solve Ax = b
|
// Allocate matrix memory for the worst case.
|
||||||
if (b3_enablePrecontitioning)
|
u32 nzCount = m_massCount * m_massCount;
|
||||||
|
b3Mat33* nzElements = (b3Mat33*)m_allocator->Allocate(nzCount * sizeof(b3Mat33));
|
||||||
|
u32* nzColumns = (u32*)m_allocator->Allocate(nzCount * sizeof(u32));
|
||||||
|
u32* rowPtrs = (u32*)m_allocator->Allocate((m_massCount + 1) * sizeof(u32));
|
||||||
|
|
||||||
{
|
{
|
||||||
Solve_MPCG(x, f, m_iterations, b);
|
b3SparseMat33 A(m_massCount, m_massCount, nzCount, nzElements, rowPtrs, nzColumns);
|
||||||
}
|
|
||||||
else
|
//
|
||||||
{
|
b3DenseVec3 b(m_massCount);
|
||||||
Solve_MCG(x, f, m_iterations, b);
|
|
||||||
|
//
|
||||||
|
Compute_A_b(A, b);
|
||||||
|
|
||||||
|
// x
|
||||||
|
b3DenseVec3 x(m_massCount);
|
||||||
|
|
||||||
|
if (b3_enablePrecontitioning)
|
||||||
|
{
|
||||||
|
Solve_MPCG(x, A, f, m_iterations, b);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Solve_MCG(x, A, f, m_iterations, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update state
|
||||||
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
|
{
|
||||||
|
m_v[i] += x[i];
|
||||||
|
|
||||||
|
// dx = h * (v0 + dv) + y = h * v1 + y
|
||||||
|
m_x[i] += m_h * m_v[i] + m_y[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update state
|
m_allocator->Free(rowPtrs);
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
m_allocator->Free(nzColumns);
|
||||||
{
|
m_allocator->Free(nzElements);
|
||||||
m_v[i] += x[i];
|
|
||||||
|
|
||||||
// dx = h * (v0 + dv) + y = h * v1 + y
|
|
||||||
m_x[i] += m_h * m_v[i] + m_y[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
m_allocator->Free(x);
|
|
||||||
m_allocator->Free(b);
|
|
||||||
|
|
||||||
m_allocator->Free(m_Jv);
|
m_allocator->Free(m_Jv);
|
||||||
m_allocator->Free(m_Jx);
|
m_allocator->Free(m_Jx);
|
||||||
@ -107,6 +122,8 @@ void b3SpringSolver::Solve(b3Vec3* f)
|
|||||||
m_Jx = nullptr;
|
m_Jx = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define B3_INDEX(i, j, size) (i + j * size)
|
||||||
|
|
||||||
static void b3SetZero(b3Vec3* out, u32 size)
|
static void b3SetZero(b3Vec3* out, u32 size)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < size; ++i)
|
for (u32 i = 0; i < size; ++i)
|
||||||
@ -115,63 +132,11 @@ static void b3SetZero(b3Vec3* out, u32 size)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void b3Copy(b3Vec3* out, const b3Vec3* v, u32 size)
|
|
||||||
{
|
|
||||||
for (u32 i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
out[i] = v[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void b3Add(b3Vec3* out, const b3Vec3* a, const b3Vec3* b, u32 size)
|
|
||||||
{
|
|
||||||
for (u32 i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
out[i] = a[i] + b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void b3Sub(b3Vec3* out, const b3Vec3* a, const b3Vec3* b, u32 size)
|
|
||||||
{
|
|
||||||
for (u32 i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
out[i] = a[i] - b[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static float32 b3Dot(const b3Vec3* a, const b3Vec3* b, u32 size)
|
|
||||||
{
|
|
||||||
float32 result = 0.0f;
|
|
||||||
for (u32 i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
result += b3Dot(a[i], b[i]);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define B3_INDEX(i, j, size) (i + j * size)
|
|
||||||
|
|
||||||
static void b3SetZero(b3Mat33* out, u32 size)
|
static void b3SetZero(b3Mat33* out, u32 size)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < size; ++i)
|
for (u32 i = 0; i < size * size; ++i)
|
||||||
{
|
|
||||||
for (u32 j = 0; j < size; ++j)
|
|
||||||
{
|
|
||||||
out[B3_INDEX(i, j, size)].SetZero();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void b3Mul(b3Vec3* out, const b3Mat33* M, const b3Vec3* v, u32 size)
|
|
||||||
{
|
|
||||||
for (u32 i = 0; i < size; ++i)
|
|
||||||
{
|
{
|
||||||
out[i].SetZero();
|
out[i].SetZero();
|
||||||
|
|
||||||
for (u32 j = 0; j < size; ++j)
|
|
||||||
{
|
|
||||||
out[i] += M[B3_INDEX(i, j, size)] * v[j];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,7 +149,7 @@ static void b3SetZero_Jacobian(b3Mat33* out, u32 springCount)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// J = dfdx or dvdx
|
// J = dfdx or dvdx
|
||||||
static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount,
|
static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount,
|
||||||
const b3Mat33* J_ii, const b3Spring* springs, u32 springCount)
|
const b3Mat33* J_ii, const b3Spring* springs, u32 springCount)
|
||||||
{
|
{
|
||||||
b3SetZero(out, massCount);
|
b3SetZero(out, massCount);
|
||||||
@ -205,117 +170,7 @@ static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// A = M - h * dfdv - h * h * dfdx
|
void b3SpringSolver::ApplySpringForces()
|
||||||
// A * v = (M - h * dfdv - h * h * dfdx) * v = M * v + (-h * dfdv * v) + (-h * h * dfdx * v)
|
|
||||||
static void b3Mul_A(b3Vec3* out, const b3Vec3* v, u32 massCount,
|
|
||||||
b3StackAllocator* allocator,
|
|
||||||
const float32* m, float32 h, const b3Mat33* Jx, const b3Mat33* Jv, const b3Spring* springs, u32 springCount)
|
|
||||||
{
|
|
||||||
// v1 = M * v
|
|
||||||
b3Vec3* v1 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3));
|
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
|
||||||
{
|
|
||||||
v1[i] = m[i] * v[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
// v2 = (-h * dfdv * v)
|
|
||||||
b3Vec3* v2 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3));
|
|
||||||
b3Mul_Jacobian(v2, v, massCount, Jv, springs, springCount);
|
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
|
||||||
{
|
|
||||||
v2[i] *= -h;
|
|
||||||
}
|
|
||||||
|
|
||||||
// v3 = (-h * h * dfdx * v)
|
|
||||||
b3Vec3* v3 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3));
|
|
||||||
b3Mul_Jacobian(v3, v, massCount, Jx, springs, springCount);
|
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
|
||||||
{
|
|
||||||
v3[i] *= -h * h;
|
|
||||||
}
|
|
||||||
|
|
||||||
// v = v1 + v2 + v3
|
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
|
||||||
{
|
|
||||||
out[i] = v1[i] + v2[i] + v3[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
allocator->Free(v3);
|
|
||||||
allocator->Free(v2);
|
|
||||||
allocator->Free(v1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This outputs the desired acceleration of the masses in the constrained
|
|
||||||
// directions.
|
|
||||||
static void b3Compute_z(b3Vec3* out,
|
|
||||||
u32 massCount, const b3MassType* types, const b3MassContact* contacts)
|
|
||||||
{
|
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
|
||||||
{
|
|
||||||
switch (types[i])
|
|
||||||
{
|
|
||||||
case e_staticMass:
|
|
||||||
{
|
|
||||||
out[i].SetZero();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case e_dynamicMass:
|
|
||||||
{
|
|
||||||
if (contacts[i].lockOnSurface)
|
|
||||||
{
|
|
||||||
out[i].SetZero();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
out[i].SetZero();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
B3_ASSERT(false);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void b3Filter(b3Vec3* out,
|
|
||||||
const b3Vec3* v, u32 massCount, const b3MassType* types, const b3MassContact* contacts)
|
|
||||||
{
|
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
|
||||||
{
|
|
||||||
switch (types[i])
|
|
||||||
{
|
|
||||||
case e_staticMass:
|
|
||||||
{
|
|
||||||
out[i].SetZero();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case e_dynamicMass:
|
|
||||||
{
|
|
||||||
if (contacts[i].lockOnSurface)
|
|
||||||
{
|
|
||||||
// Ensure the prohibited direction points to the solid.
|
|
||||||
b3Vec3 n = contacts[i].n;
|
|
||||||
b3Mat33 S = b3Mat33_identity - b3Outer(n, n);
|
|
||||||
|
|
||||||
out[i] = S * v[i];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
out[i] = v[i];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
B3_ASSERT(false);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void b3SpringSolver::InitializeSpringForces()
|
|
||||||
{
|
{
|
||||||
// Zero Jacobians
|
// Zero Jacobians
|
||||||
b3SetZero_Jacobian(m_Jx, m_springCount);
|
b3SetZero_Jacobian(m_Jx, m_springCount);
|
||||||
@ -325,7 +180,7 @@ void b3SpringSolver::InitializeSpringForces()
|
|||||||
for (u32 i = 0; i < m_springCount; ++i)
|
for (u32 i = 0; i < m_springCount; ++i)
|
||||||
{
|
{
|
||||||
b3Spring* S = m_springs + i;
|
b3Spring* S = m_springs + i;
|
||||||
|
|
||||||
b3SpringType type = S->type;
|
b3SpringType type = S->type;
|
||||||
u32 i1 = S->i1;
|
u32 i1 = S->i1;
|
||||||
u32 i2 = S->i2;
|
u32 i2 = S->i2;
|
||||||
@ -353,11 +208,11 @@ void b3SpringSolver::InitializeSpringForces()
|
|||||||
|
|
||||||
// C * n = 1 - L0 / L * dx
|
// C * n = 1 - L0 / L * dx
|
||||||
const b3Mat33 I = b3Mat33_identity;
|
const b3Mat33 I = b3Mat33_identity;
|
||||||
|
|
||||||
float32 L3 = L * L * L;
|
float32 L3 = L * L * L;
|
||||||
|
|
||||||
b3Mat33 Jx11 = -ks * ( (1.0f - L0 / L) * I + (L0 / L3) * b3Outer(dx, dx) );
|
b3Mat33 Jx11 = -ks * ((1.0f - L0 / L) * I + (L0 / L3) * b3Outer(dx, dx));
|
||||||
|
|
||||||
m_Jx[i] = Jx11;
|
m_Jx[i] = Jx11;
|
||||||
|
|
||||||
// Compute damping forces
|
// Compute damping forces
|
||||||
@ -370,13 +225,125 @@ void b3SpringSolver::InitializeSpringForces()
|
|||||||
m_f[i2] += df2;
|
m_f[i2] += df2;
|
||||||
|
|
||||||
b3Mat33 Jv11 = -kd * I;
|
b3Mat33 Jv11 = -kd * I;
|
||||||
|
|
||||||
m_Jv[i] = Jv11;
|
m_Jv[i] = Jv11;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3SpringSolver::Compute_b(b3Vec3* b) const
|
static B3_FORCE_INLINE bool b3IsZero(const b3Mat33& A)
|
||||||
{
|
{
|
||||||
|
bool isZeroX = b3Dot(A.x, A.x) <= B3_EPSILON * B3_EPSILON;
|
||||||
|
bool isZeroY = b3Dot(A.y, A.y) <= B3_EPSILON * B3_EPSILON;
|
||||||
|
bool isZeroZ = b3Dot(A.z, A.z) <= B3_EPSILON * B3_EPSILON;
|
||||||
|
|
||||||
|
return isZeroX * isZeroY * isZeroZ;
|
||||||
|
}
|
||||||
|
|
||||||
|
void b3SpringSolver::Compute_A_b(b3SparseMat33& SA, b3DenseVec3& b) const
|
||||||
|
{
|
||||||
|
// Compute dfdx, dfdv
|
||||||
|
b3Mat33* dfdx = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
||||||
|
b3SetZero(dfdx, m_massCount);
|
||||||
|
|
||||||
|
b3Mat33* dfdv = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
||||||
|
b3SetZero(dfdv, m_massCount);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < m_springCount; ++i)
|
||||||
|
{
|
||||||
|
const b3Spring* S = m_springs + i;
|
||||||
|
u32 i1 = S->i1;
|
||||||
|
u32 i2 = S->i2;
|
||||||
|
|
||||||
|
b3Mat33 Jx11 = m_Jx[i];
|
||||||
|
b3Mat33 Jx12 = -Jx11;
|
||||||
|
b3Mat33 Jx21 = Jx12;
|
||||||
|
b3Mat33 Jx22 = Jx11;
|
||||||
|
|
||||||
|
dfdx[B3_INDEX(i1, i1, m_massCount)] += Jx11;
|
||||||
|
dfdx[B3_INDEX(i1, i2, m_massCount)] += Jx12;
|
||||||
|
dfdx[B3_INDEX(i2, i1, m_massCount)] += Jx21;
|
||||||
|
dfdx[B3_INDEX(i2, i2, m_massCount)] += Jx22;
|
||||||
|
|
||||||
|
b3Mat33 Jv11 = m_Jv[i];
|
||||||
|
b3Mat33 Jv12 = -Jv11;
|
||||||
|
b3Mat33 Jv21 = Jv12;
|
||||||
|
b3Mat33 Jv22 = Jv11;
|
||||||
|
|
||||||
|
dfdv[B3_INDEX(i1, i1, m_massCount)] += Jv11;
|
||||||
|
dfdv[B3_INDEX(i1, i2, m_massCount)] += Jv12;
|
||||||
|
dfdv[B3_INDEX(i2, i1, m_massCount)] += Jv21;
|
||||||
|
dfdv[B3_INDEX(i2, i2, m_massCount)] += Jv22;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute A
|
||||||
|
// A = M - h * dfdv - h * h * dfdx
|
||||||
|
|
||||||
|
// A = 0
|
||||||
|
b3Mat33* A = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
||||||
|
b3SetZero(A, m_massCount);
|
||||||
|
|
||||||
|
// A += M
|
||||||
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
|
{
|
||||||
|
A[B3_INDEX(i, i, m_massCount)] += b3Diagonal(m_m[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// A += - h * dfdv - h * h * dfdx
|
||||||
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
|
{
|
||||||
|
for (u32 j = 0; j < m_massCount; ++j)
|
||||||
|
{
|
||||||
|
A[B3_INDEX(i, j, m_massCount)] += (-m_h * dfdv[B3_INDEX(i, j, m_massCount)]) + (-m_h * m_h * dfdx[B3_INDEX(i, j, m_massCount)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assembly sparsity
|
||||||
|
u32 nzCount = 0;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
for (u32 i = 0; i < m_massCount * m_massCount; ++i)
|
||||||
|
{
|
||||||
|
b3Mat33 a = A[i];
|
||||||
|
if (b3IsZero(a) == false)
|
||||||
|
{
|
||||||
|
++nzCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SA.row_ptrs[0] = 0;
|
||||||
|
|
||||||
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
|
{
|
||||||
|
u32 rowNzCount = 0;
|
||||||
|
|
||||||
|
for (u32 j = 0; j < m_massCount; ++j)
|
||||||
|
{
|
||||||
|
b3Mat33 a = A[B3_INDEX(i, j, m_massCount)];
|
||||||
|
|
||||||
|
if (b3IsZero(a) == false)
|
||||||
|
{
|
||||||
|
B3_ASSERT(nzCount <= SA.valueCount);
|
||||||
|
|
||||||
|
SA.values[nzCount] = a;
|
||||||
|
SA.cols[nzCount] = j;
|
||||||
|
|
||||||
|
++nzCount;
|
||||||
|
++rowNzCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SA.row_ptrs[i + 1] = SA.row_ptrs[(i + 1) - 1] + rowNzCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
B3_ASSERT(nzCount <= SA.valueCount);
|
||||||
|
SA.valueCount = nzCount;
|
||||||
|
|
||||||
|
m_allocator->Free(A);
|
||||||
|
|
||||||
|
// Compute b
|
||||||
|
// b = h * (f0 + h * Jx_v + Jx_y )
|
||||||
|
|
||||||
// Jx_v = dfdx * v
|
// Jx_v = dfdx * v
|
||||||
b3Vec3* Jx_v = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3Vec3* Jx_v = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
||||||
b3Mul_Jacobian(Jx_v, m_v, m_massCount, m_Jx, m_springs, m_springCount);
|
b3Mul_Jacobian(Jx_v, m_v, m_massCount, m_Jx, m_springs, m_springCount);
|
||||||
@ -385,7 +352,6 @@ void b3SpringSolver::Compute_b(b3Vec3* b) const
|
|||||||
b3Vec3* Jx_y = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3Vec3* Jx_y = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
||||||
b3Mul_Jacobian(Jx_y, m_y, m_massCount, m_Jx, m_springs, m_springCount);
|
b3Mul_Jacobian(Jx_y, m_y, m_massCount, m_Jx, m_springs, m_springCount);
|
||||||
|
|
||||||
// b = h * (f0 + h * Jx_v + Jx_y )
|
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
b[i] = m_h * (m_f[i] + m_h * Jx_v[i] + Jx_y[i]);
|
b[i] = m_h * (m_f[i] + m_h * Jx_v[i] + Jx_y[i]);
|
||||||
@ -393,184 +359,198 @@ void b3SpringSolver::Compute_b(b3Vec3* b) const
|
|||||||
|
|
||||||
m_allocator->Free(Jx_y);
|
m_allocator->Free(Jx_y);
|
||||||
m_allocator->Free(Jx_v);
|
m_allocator->Free(Jx_v);
|
||||||
|
|
||||||
|
m_allocator->Free(dfdv);
|
||||||
|
m_allocator->Free(dfdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3SpringSolver::Solve_MCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3Vec3* b) const
|
// This outputs the desired acceleration of the masses in the constrained
|
||||||
|
// directions.
|
||||||
|
static void b3Compute_z(b3DenseVec3& out,
|
||||||
|
u32 massCount, const b3MassType* types, const b3MassContact* contacts)
|
||||||
|
{
|
||||||
|
out.SetZero();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Maintains invariants inside the MCG solver.
|
||||||
|
static void b3Filter(b3DenseVec3& out,
|
||||||
|
const b3DenseVec3& v, u32 massCount, const b3MassType* types, const b3MassContact* contacts)
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < massCount; ++i)
|
||||||
|
{
|
||||||
|
switch (types[i])
|
||||||
|
{
|
||||||
|
case e_staticMass:
|
||||||
|
{
|
||||||
|
out[i].SetZero();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case e_dynamicMass:
|
||||||
|
{
|
||||||
|
if (contacts[i].lockN == true)
|
||||||
|
{
|
||||||
|
b3Vec3 n = contacts[i].n;
|
||||||
|
|
||||||
|
b3Mat33 S = b3Mat33_identity - b3Outer(n, n);
|
||||||
|
|
||||||
|
if (contacts[i].lockT1 == true)
|
||||||
|
{
|
||||||
|
b3Vec3 t1 = contacts[i].t1;
|
||||||
|
|
||||||
|
S -= b3Outer(t1, t1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (contacts[i].lockT2 == true)
|
||||||
|
{
|
||||||
|
b3Vec3 t2 = contacts[i].t2;
|
||||||
|
|
||||||
|
S -= b3Outer(t2, t2);
|
||||||
|
}
|
||||||
|
|
||||||
|
out[i] = S * v[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
out[i] = v[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
B3_ASSERT(false);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void b3SpringSolver::Solve_MCG(b3DenseVec3& dv, const b3SparseMat33& A, b3DenseVec3& e, u32& iterations, const b3DenseVec3& b) const
|
||||||
{
|
{
|
||||||
// dv = z
|
// dv = z
|
||||||
b3Compute_z(dv, m_massCount, m_types, m_contacts);
|
b3Compute_z(dv, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
// r = filter(b - Adv)
|
|
||||||
b3Vec3* r = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
|
||||||
|
|
||||||
// Adv = A * dv
|
// Adv = A * dv
|
||||||
b3Vec3* Adv = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 Adv(m_massCount);
|
||||||
|
A.Mul(Adv, dv);
|
||||||
|
|
||||||
b3Mul_A(Adv, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount);
|
// r = filter(b - Adv)
|
||||||
b3Sub(r, b, Adv, m_massCount);
|
b3DenseVec3 r(m_massCount);
|
||||||
|
b3Sub(r, b, Adv);
|
||||||
b3Filter(r, r, m_massCount, m_types, m_contacts);
|
b3Filter(r, r, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
m_allocator->Free(Adv);
|
|
||||||
|
|
||||||
// c = r
|
// c = r
|
||||||
b3Vec3* c = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 c = r;
|
||||||
b3Copy(c, r, m_massCount);
|
|
||||||
|
|
||||||
// eps0 = dot(f, f)
|
// eps0 = dot(r, r)
|
||||||
float32 eps0 = b3Dot(r, r, m_massCount);
|
float32 eps0 = b3Dot(r, r);
|
||||||
|
|
||||||
// epsNew = dot(r, r)
|
// epsNew = dot(r, r)
|
||||||
float32 epsNew = eps0;
|
float32 epsNew = eps0;
|
||||||
|
|
||||||
// [0, 1]
|
// [0, 1]
|
||||||
const float32 kTol = 0.25f;
|
const float32 kTol = 10.0f * B3_EPSILON;
|
||||||
|
|
||||||
// Limit number of iterations to prevent cycling.
|
// Limit number of iterations to prevent cycling.
|
||||||
const u32 kMaxIters = 100;
|
const u32 kMaxIters = 10 * 10;
|
||||||
|
|
||||||
// Main iteration loop.
|
// Main iteration loop.
|
||||||
u32 iter = 0;
|
u32 iter = 0;
|
||||||
while (iter < kMaxIters && epsNew > kTol * kTol * eps0)
|
while (iter < kMaxIters && epsNew > kTol * kTol * eps0)
|
||||||
{
|
{
|
||||||
// q = filter(A * c)
|
// q = filter(A * c)
|
||||||
b3Vec3* q = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 q(m_massCount);
|
||||||
b3Mul_A(q, c, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount);
|
A.Mul(q, c);
|
||||||
b3Filter(q, q, m_massCount, m_types, m_contacts);
|
b3Filter(q, q, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
// alpha = epsNew / dot(c, q)
|
// alpha = epsNew / dot(c, q)
|
||||||
float32 alpha_den = b3Dot(c, q, m_massCount);
|
float32 alpha_den = b3Dot(c, q);
|
||||||
float32 alpha = epsNew / alpha_den;
|
float32 alpha = epsNew / alpha_den;
|
||||||
|
|
||||||
// dv = dv + alpha * c
|
// dv = dv + alpha * c
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
b3DenseVec3 alpha_c(m_massCount);
|
||||||
{
|
b3Mul(alpha_c, alpha, c);
|
||||||
dv[i] = dv[i] + alpha * c[i];
|
b3Add(dv, dv, alpha_c);
|
||||||
}
|
|
||||||
|
|
||||||
// r = r - alpha * q
|
// r = r - alpha * q
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
b3DenseVec3 alpha_q(m_massCount);
|
||||||
{
|
b3Mul(alpha_q, alpha, q);
|
||||||
r[i] = r[i] - alpha * q[i];
|
b3Sub(r, r, alpha_q);
|
||||||
}
|
|
||||||
|
|
||||||
m_allocator->Free(q);
|
|
||||||
|
|
||||||
// epsOld = epsNew
|
// epsOld = epsNew
|
||||||
float32 epsOld = epsNew;
|
float32 epsOld = epsNew;
|
||||||
|
|
||||||
// epsNew = dot(r, r)
|
// epsNew = dot(r, r)
|
||||||
epsNew = b3Dot(r, r, m_massCount);
|
epsNew = b3Dot(r, r);
|
||||||
|
|
||||||
float32 beta = epsNew / epsOld;
|
float32 beta = epsNew / epsOld;
|
||||||
|
|
||||||
// c = filter(r + beta * c)
|
// c = filter(r + beta * c)
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
b3DenseVec3 beta_c(m_massCount);
|
||||||
{
|
b3Mul(beta_c, beta, c);
|
||||||
c[i] = r[i] + beta * c[i];
|
b3Add(c, r, beta_c);
|
||||||
}
|
|
||||||
b3Filter(c, c, m_massCount, m_types, m_contacts);
|
b3Filter(c, c, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
++iter;
|
++iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_allocator->Free(c);
|
|
||||||
m_allocator->Free(r);
|
|
||||||
|
|
||||||
iterations = iter;
|
iterations = iter;
|
||||||
|
|
||||||
// f = A * dv - b
|
// f = A * dv - b
|
||||||
b3Mul_A(e, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount);
|
A.Mul(e, dv);
|
||||||
b3Sub(e, e, b, m_massCount);
|
b3Sub(e, e, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void b3Make_A(b3Mat33* A,
|
// Sylvester's Criterion
|
||||||
const b3Mat33* Jx, const b3Mat33* Jv, u32 massCount,
|
static bool b3IsPD(const b3Mat33* diagA, u32 n)
|
||||||
b3StackAllocator* allocator, float32 h, float32* m,
|
|
||||||
const b3Spring* springs, u32 springCount)
|
|
||||||
{
|
{
|
||||||
// A = M - h * dfdv - h * h * dfdx
|
// Loop over the principal elements
|
||||||
|
for (u32 i = 0; i < n; ++i)
|
||||||
// A = 0
|
|
||||||
b3SetZero(A, massCount);
|
|
||||||
|
|
||||||
// Compute dfdx, dfdv
|
|
||||||
b3Mat33* dfdx = (b3Mat33*)allocator->Allocate(massCount * massCount * sizeof(b3Mat33));
|
|
||||||
b3SetZero(dfdx, massCount);
|
|
||||||
|
|
||||||
b3Mat33* dfdv = (b3Mat33*)allocator->Allocate(massCount * massCount * sizeof(b3Mat33));
|
|
||||||
b3SetZero(dfdv, massCount);
|
|
||||||
|
|
||||||
for (u32 i = 0; i < springCount; ++i)
|
|
||||||
{
|
{
|
||||||
const b3Spring* S = springs + i;
|
b3Mat33 a = diagA[i];
|
||||||
u32 i1 = S->i1;
|
|
||||||
u32 i2 = S->i2;
|
float32 D = b3Det(a.x, a.y, a.z);
|
||||||
|
|
||||||
b3Mat33 Jx11 = Jx[i];
|
const float32 kTol = 0.0f;
|
||||||
b3Mat33 Jx12 = -Jx11;
|
|
||||||
b3Mat33 Jx21 = Jx12;
|
|
||||||
b3Mat33 Jx22 = Jx11;
|
|
||||||
|
|
||||||
dfdx[B3_INDEX(i1, i1, massCount)] += Jx11;
|
if (D <= kTol)
|
||||||
dfdx[B3_INDEX(i1, i2, massCount)] += Jx12;
|
|
||||||
dfdx[B3_INDEX(i2, i1, massCount)] += Jx21;
|
|
||||||
dfdx[B3_INDEX(i2, i2, massCount)] += Jx22;
|
|
||||||
|
|
||||||
b3Mat33 Jv11 = Jv[i];
|
|
||||||
b3Mat33 Jv12 = -Jv11;
|
|
||||||
b3Mat33 Jv21 = Jv12;
|
|
||||||
b3Mat33 Jv22 = Jv11;
|
|
||||||
|
|
||||||
dfdv[B3_INDEX(i1, i1, massCount)] += Jv11;
|
|
||||||
dfdv[B3_INDEX(i1, i2, massCount)] += Jv12;
|
|
||||||
dfdv[B3_INDEX(i2, i1, massCount)] += Jv21;
|
|
||||||
dfdv[B3_INDEX(i2, i2, massCount)] += Jv22;
|
|
||||||
}
|
|
||||||
|
|
||||||
// A += M
|
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
|
||||||
{
|
|
||||||
A[B3_INDEX(i, i, massCount)] += b3Diagonal(m[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// A += - h * dfdv - h * h * dfdx
|
|
||||||
for (u32 i = 0; i < massCount; ++i)
|
|
||||||
{
|
|
||||||
for (u32 j = 0; j < massCount; ++j)
|
|
||||||
{
|
{
|
||||||
A[B3_INDEX(i, j, massCount)] += (-h * dfdv[B3_INDEX(i, j, massCount)]) + (-h * h * dfdx[B3_INDEX(i, j, massCount)]);
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
allocator->Free(dfdv);
|
return true;
|
||||||
allocator->Free(dfdx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3Vec3* b) const
|
void b3SpringSolver::Solve_MPCG(b3DenseVec3& dv, const b3SparseMat33& A, b3DenseVec3& e, u32& iterations, const b3DenseVec3& b) const
|
||||||
{
|
{
|
||||||
b3Vec3* r = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 r(m_massCount);
|
||||||
|
|
||||||
|
b3DenseVec3 c(m_massCount);
|
||||||
|
|
||||||
|
b3DenseVec3 s(m_massCount);
|
||||||
|
|
||||||
b3Vec3* c = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 inv_P(m_massCount);
|
||||||
|
|
||||||
b3Vec3* s = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
|
||||||
|
|
||||||
b3Vec3* inv_P = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
|
||||||
|
|
||||||
// dv = z
|
// dv = z
|
||||||
b3Compute_z(dv, m_massCount, m_types, m_contacts);
|
b3Compute_z(dv, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
// P = diag(A)^-1
|
// P = diag(A)^-1
|
||||||
b3Vec3* P = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 P(m_massCount);
|
||||||
|
|
||||||
// A = M - h * dfdv - h * h * dfdx
|
// diag(A)
|
||||||
b3Mat33* A = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33));
|
b3Mat33* diagA = (b3Mat33*)m_allocator->Allocate(m_massCount * sizeof(b3Mat33));
|
||||||
b3Make_A(A, m_Jx, m_Jv, m_massCount, m_allocator, m_h, m_m, m_springs, m_springCount);
|
A.AssembleDiagonal(diagA);
|
||||||
|
|
||||||
// Compute P, P^-1
|
// Compute P, P^-1
|
||||||
// @todo Optimize so we don't need to compute A.
|
bool isPD = true;
|
||||||
|
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
b3Mat33 D = A[B3_INDEX(i, i, m_massCount)];
|
b3Mat33 D = diagA[i];
|
||||||
|
|
||||||
|
if (b3Det(D.x, D.y, D.z) <= 3.0f * B3_EPSILON)
|
||||||
|
{
|
||||||
|
isPD = false;
|
||||||
|
}
|
||||||
|
|
||||||
B3_ASSERT(D[0][0] != 0.0f);
|
B3_ASSERT(D[0][0] != 0.0f);
|
||||||
B3_ASSERT(D[1][1] != 0.0f);
|
B3_ASSERT(D[1][1] != 0.0f);
|
||||||
@ -580,13 +560,13 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3
|
|||||||
inv_P[i] = b3Vec3(D[0][0], D[1][1], D[2][2]);
|
inv_P[i] = b3Vec3(D[0][0], D[1][1], D[2][2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_allocator->Free(A);
|
m_allocator->Free(diagA);
|
||||||
|
|
||||||
// eps0 = dot( filter(b), P * filter(b) )
|
// eps0 = dot( filter(b), P * filter(b) )
|
||||||
b3Vec3* filter_b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 filter_b(m_massCount);
|
||||||
b3Filter(filter_b, b, m_massCount, m_types, m_contacts);
|
b3Filter(filter_b, b, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
b3Vec3* P_filter_b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 P_filter_b(m_massCount);
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
P_filter_b[i][0] = P[i][0] * filter_b[i][0];
|
P_filter_b[i][0] = P[i][0] * filter_b[i][0];
|
||||||
@ -594,22 +574,14 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3
|
|||||||
P_filter_b[i][2] = P[i][2] * filter_b[i][2];
|
P_filter_b[i][2] = P[i][2] * filter_b[i][2];
|
||||||
}
|
}
|
||||||
|
|
||||||
float32 eps0 = b3Dot(filter_b, P_filter_b, m_massCount);
|
float32 eps0 = b3Dot(filter_b, P_filter_b);
|
||||||
|
|
||||||
m_allocator->Free(P_filter_b);
|
|
||||||
m_allocator->Free(filter_b);
|
|
||||||
m_allocator->Free(P);
|
|
||||||
|
|
||||||
// r = filter(b - Adv)
|
// r = filter(b - Adv)
|
||||||
|
|
||||||
// Adv = A * dv
|
// Adv = A * dv
|
||||||
b3Vec3* Adv = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 Adv(m_massCount);
|
||||||
b3Mul_A(Adv, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount);
|
A.Mul(Adv, dv);
|
||||||
|
b3Sub(r, b, Adv);
|
||||||
b3Sub(r, b, Adv, m_massCount);
|
|
||||||
|
|
||||||
m_allocator->Free(Adv);
|
|
||||||
|
|
||||||
b3Filter(r, r, m_massCount, m_types, m_contacts);
|
b3Filter(r, r, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
// c = filter(P^-1 * r)
|
// c = filter(P^-1 * r)
|
||||||
@ -620,28 +592,27 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3
|
|||||||
c[i][2] = inv_P[i][2] * r[i][2];
|
c[i][2] = inv_P[i][2] * r[i][2];
|
||||||
}
|
}
|
||||||
b3Filter(c, c, m_massCount, m_types, m_contacts);
|
b3Filter(c, c, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
// epsNew = dot(r, c)
|
// epsNew = dot(r, c)
|
||||||
float32 epsNew = b3Dot(r, c, m_massCount);
|
float32 epsNew = b3Dot(r, c);
|
||||||
|
|
||||||
// [0, 1]
|
// [0, 1]
|
||||||
const float32 kTol = 0.25f;
|
const float32 kTol = 10.0f * B3_EPSILON;
|
||||||
|
|
||||||
// Limit number of iterations to prevent cycling.
|
// Limit number of iterations to prevent cycling.
|
||||||
const u32 kMaxIters = 100;
|
const u32 kMaxIters = 10 * 10;
|
||||||
|
|
||||||
// Main iteration loop.
|
// Main iteration loop.
|
||||||
u32 iter = 0;
|
u32 iter = 0;
|
||||||
while (iter < kMaxIters && epsNew > kTol * kTol * eps0)
|
while (iter < kMaxIters && epsNew > kTol * kTol * eps0)
|
||||||
{
|
{
|
||||||
// q = filter(A * c)
|
// q = filter(A * c)
|
||||||
b3Vec3* q = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3));
|
b3DenseVec3 q(m_massCount);
|
||||||
|
A.Mul(q, c);
|
||||||
b3Mul_A(q, c, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount);
|
|
||||||
b3Filter(q, q, m_massCount, m_types, m_contacts);
|
b3Filter(q, q, m_massCount, m_types, m_contacts);
|
||||||
|
|
||||||
// alpha = epsNew / dot(c, q)
|
// alpha = epsNew / dot(c, q)
|
||||||
float32 alpha = epsNew / b3Dot(c, q, m_massCount);
|
float32 alpha = epsNew / b3Dot(c, q);
|
||||||
|
|
||||||
// x = x + alpha * c
|
// x = x + alpha * c
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
@ -649,14 +620,12 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3
|
|||||||
dv[i] = dv[i] + alpha * c[i];
|
dv[i] = dv[i] + alpha * c[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
// r = r - alpha * q
|
// r = r - alpha * q
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
r[i] = r[i] - alpha * q[i];
|
r[i] = r[i] - alpha * q[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
m_allocator->Free(q);
|
|
||||||
|
|
||||||
// s = inv_P * r
|
// s = inv_P * r
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
@ -668,13 +637,13 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3
|
|||||||
// epsOld = epsNew
|
// epsOld = epsNew
|
||||||
float32 epsOld = epsNew;
|
float32 epsOld = epsNew;
|
||||||
|
|
||||||
// epsNew = dot(r, s)
|
// epsNew = dot(r, s)
|
||||||
epsNew = b3Dot(r, s, m_massCount);
|
epsNew = b3Dot(r, s);
|
||||||
|
|
||||||
// beta = epsNew / epsOld
|
// beta = epsNew / epsOld
|
||||||
float32 beta = epsNew / epsOld;
|
float32 beta = epsNew / epsOld;
|
||||||
|
|
||||||
// c = filter(s + beta * c)
|
// c = filter(s + beta * c)
|
||||||
for (u32 i = 0; i < m_massCount; ++i)
|
for (u32 i = 0; i < m_massCount; ++i)
|
||||||
{
|
{
|
||||||
c[i] = s[i] + beta * c[i];
|
c[i] = s[i] + beta * c[i];
|
||||||
@ -684,15 +653,10 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3
|
|||||||
++iter;
|
++iter;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_allocator->Free(inv_P);
|
|
||||||
m_allocator->Free(s);
|
|
||||||
m_allocator->Free(c);
|
|
||||||
m_allocator->Free(r);
|
|
||||||
|
|
||||||
iterations = iter;
|
iterations = iter;
|
||||||
|
|
||||||
// Residual error
|
// Residual error
|
||||||
// f = A * x - b
|
// f = A * x - b
|
||||||
b3Mul_A(e, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount);
|
A.Mul(e, dv);
|
||||||
b3Sub(e, e, b, m_massCount);
|
b3Sub(e, e, b);
|
||||||
}
|
}
|
Reference in New Issue
Block a user