diff --git a/include/bounce/dynamics/cloth/spring_cloth.h b/include/bounce/dynamics/cloth/spring_cloth.h index 5fa0736..54d414e 100644 --- a/include/bounce/dynamics/cloth/spring_cloth.h +++ b/include/bounce/dynamics/cloth/spring_cloth.h @@ -69,8 +69,17 @@ struct b3SpringClothDef b3Vec3 gravity; }; +enum b3SpringType +{ + e_strechSpring, + e_bendSpring +}; + struct b3Spring { + // Spring type + b3SpringType type; + // Mass 1 u32 i1; @@ -124,6 +133,9 @@ public: // void Initialize(const b3SpringClothDef& def); + // + b3Mesh* GetMesh() const; + // void SetGravity(const b3Vec3& gravity); @@ -136,6 +148,18 @@ public: // b3MassType GetType(u32 i) const; + // Note, the position will be changed only after performing a time step. + void SetPosition(u32 i, const b3Vec3& translation); + + // + const b3Vec3& GetPosition(u32 i) const; + + // + void ApplyForce(u32 i, const b3Vec3& force); + + // + float32 GetKineticEnergy() const; + // void AddShape(b3Shape* shape); @@ -173,6 +197,7 @@ protected: b3Vec3* m_x; b3Vec3* m_v; b3Vec3* m_f; + float32* m_m; float32* m_inv_m; b3Vec3* m_y; b3MassType* m_types; @@ -189,6 +214,11 @@ protected: b3SpringClothStep m_step; }; +inline b3Mesh* b3SpringCloth::GetMesh() const +{ + return m_mesh; +} + inline const b3Vec3& b3SpringCloth::GetGravity() const { return m_gravity; @@ -208,7 +238,57 @@ inline b3MassType b3SpringCloth::GetType(u32 i) const inline void b3SpringCloth::SetType(u32 i, b3MassType type) { B3_ASSERT(i < m_massCount); + if (m_types[i] == type) + { + return; + } + m_types[i] = type; + + m_f[i].SetZero(); + + if (type == e_staticMass) + { + m_v[i].SetZero(); + m_y[i].SetZero(); + + m_contacts[i].lockOnSurface = false; + } +} + +inline void b3SpringCloth::SetPosition(u32 i, const b3Vec3& position) +{ + B3_ASSERT(i < m_massCount); + m_y[i] += position - m_x[i]; +} + +inline const b3Vec3& b3SpringCloth::GetPosition(u32 i) const +{ + B3_ASSERT(i < m_massCount); + return m_x[i]; +} + +inline void b3SpringCloth::ApplyForce(u32 i, const b3Vec3& force) +{ + B3_ASSERT(i < m_massCount); + + if (m_types[i] != e_dynamicMass) + { + return; + } + + m_f[i] += force; +} + +inline float32 b3SpringCloth::GetKineticEnergy() const +{ + float32 E = 0.0f; + for (u32 i = 0; i < m_massCount; ++i) + { + b3Vec3 P = m_m[i] * m_v[i]; + E += b3Dot(P, m_v[i]); + } + return E; } inline u32 b3SpringCloth::GetShapeCount() const diff --git a/include/bounce/dynamics/cloth/spring_solver.h b/include/bounce/dynamics/cloth/spring_solver.h index f521790..29eef81 100644 --- a/include/bounce/dynamics/cloth/spring_solver.h +++ b/include/bounce/dynamics/cloth/spring_solver.h @@ -74,6 +74,7 @@ private: b3Vec3* m_x; b3Vec3* m_v; b3Vec3* m_f; + float32* m_m; float32* m_inv_m; b3Vec3* m_y; b3MassType* m_types; diff --git a/src/bounce/dynamics/cloth/spring_cloth.cpp b/src/bounce/dynamics/cloth/spring_cloth.cpp index 62c12cd..572e861 100644 --- a/src/bounce/dynamics/cloth/spring_cloth.cpp +++ b/src/bounce/dynamics/cloth/spring_cloth.cpp @@ -36,6 +36,7 @@ b3SpringCloth::b3SpringCloth() m_v = nullptr; m_f = nullptr; m_y = nullptr; + m_m = nullptr; m_inv_m = nullptr; m_types = nullptr; m_contacts = nullptr; @@ -57,22 +58,107 @@ b3SpringCloth::~b3SpringCloth() b3Free(m_v); b3Free(m_f); b3Free(m_inv_m); + b3Free(m_m); b3Free(m_y); b3Free(m_types); b3Free(m_contacts); b3Free(m_springs); } +static B3_FORCE_INLINE u32 b3NextIndex(u32 i) +{ + return i + 1 < 3 ? i + 1 : 0; +} + +struct b3UniqueEdge +{ + u32 v1, v2; +}; + +struct b3SharedEdge +{ + u32 v1, v2; + u32 nsv1, nsv2; +}; + +static void b3FindEdges(b3UniqueEdge* uniqueEdges, u32& uniqueCount, b3SharedEdge* sharedEdges, u32& sharedCount, const b3Mesh* m) +{ + uniqueCount = 0; + sharedCount = 0; + + for (u32 i = 0; i < m->triangleCount; ++i) + { + b3Triangle* t1 = m->triangles + i; + u32 i1s[3] = { t1->v1, t1->v2, t1->v3 }; + + for (u32 j1 = 0; j1 < 3; ++j1) + { + u32 t1v1 = i1s[j1]; + + u32 t1v2 = i1s[ b3NextIndex(j1) ]; + + u32 foundCount = 0; + + for (u32 j = i + 1; j < m->triangleCount; ++j) + { + b3Triangle* t2 = m->triangles + j; + u32 i2s[3] = { t2->v1, t2->v2, t2->v3 }; + + for (u32 j2 = 0; j2 < 3; ++j2) + { + u32 t2v1 = i2s[j2]; + u32 t2v2 = i2s[ b3NextIndex(j2) ]; + + if (t1v1 == t2v2 && t1v2 == t2v1) + { + // Shared edge + b3SharedEdge se; + se.v1 = t1v1; + se.v2 = t1v2; + + // Non-shared vertices + u32 k1 = b3NextIndex(j1); + u32 k3 = b3NextIndex(k1); + u32 t1v3 = i1s[k3]; + + u32 k2 = b3NextIndex(j2); + u32 k4 = b3NextIndex(k2); + u32 t2v3 = i2s[k4]; + + se.nsv1 = t1v3; + se.nsv2 = t2v3; + + sharedEdges[sharedCount++] = se; + + ++foundCount; + } + } + } + + if (foundCount == 0) + { + b3UniqueEdge ue; + ue.v1 = t1v1; + ue.v2 = t1v2; + + uniqueEdges[uniqueCount++] = ue; + } + } + } +} + void b3SpringCloth::Initialize(const b3SpringClothDef& def) { B3_ASSERT(def.allocator); B3_ASSERT(def.mesh); + B3_ASSERT(def.density > 0.0f); m_allocator = def.allocator; + m_mesh = def.mesh; - m_gravity = def.gravity; - m_r = def.r; + + m_gravity = def.gravity; const b3Mesh* m = m_mesh; @@ -80,6 +166,7 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def) m_x = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3)); m_v = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3)); m_f = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3)); + m_m = (float32*)b3Alloc(m_massCount * sizeof(float32)); m_inv_m = (float32*)b3Alloc(m_massCount * sizeof(float32)); m_y = (b3Vec3*)b3Alloc(m_massCount * sizeof(b3Vec3)); m_types = (b3MassType*)b3Alloc(m_massCount * sizeof(b3MassType)); @@ -96,6 +183,7 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def) m_x[i] = m->vertices[i]; m_v[i].SetZero(); m_f[i].SetZero(); + m_m[i] = 0.0f; m_inv_m[i] = 0.0f; m_y[i].SetZero(); m_types[i] = e_staticMass; @@ -111,67 +199,82 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def) b3Vec3 p3 = m->vertices[t->v3]; float32 area = b3Area(p1, p2, p3); + + B3_ASSERT(area > B3_EPSILON); + float32 mass = def.density * area; const float32 inv3 = 1.0f / 3.0f; - m_inv_m[t->v1] += inv3 * mass; - m_inv_m[t->v2] += inv3 * mass; - m_inv_m[t->v3] += inv3 * mass; + m_m[t->v1] += inv3 * mass; + m_m[t->v2] += inv3 * mass; + m_m[t->v3] += inv3 * mass; } // Invert for (u32 i = 0; i < m_massCount; ++i) { - if (m_inv_m[i] > 0.0f) - { - m_inv_m[i] = 1.0f / m_inv_m[i]; - m_types[i] = e_dynamicMass; - } + B3_ASSERT(m_m[i] > 0.0f); + m_inv_m[i] = 1.0f / m_m[i]; + m_types[i] = e_dynamicMass; } // Initialize springs - m_springs = (b3Spring*)b3Alloc(3 * m->triangleCount * sizeof(b3Spring)); + u32 edgeCount = 3 * m->triangleCount; + + b3SharedEdge* sharedEdges = (b3SharedEdge*)m_allocator->Allocate(edgeCount * sizeof(b3SharedEdge)); + u32 sharedCount = 0; + + b3UniqueEdge* uniqueEdges = (b3UniqueEdge*)m_allocator->Allocate(edgeCount * sizeof(b3UniqueEdge)); + u32 uniqueCount = 0; + + b3FindEdges(uniqueEdges, uniqueCount, sharedEdges, sharedCount, m); + + u32 springCapacity = uniqueCount + sharedCount; + m_springs = (b3Spring*)b3Alloc(springCapacity * sizeof(b3Spring)); // Streching - for (u32 i = 0; i < m->triangleCount; ++i) + for (u32 i = 0; i < uniqueCount; ++i) { - b3Triangle* t = m->triangles + i; + b3UniqueEdge* e = uniqueEdges + i; - u32 is[3] = { t->v1, t->v2, t->v3 }; - for (u32 j = 0; j < 3; ++j) - { - u32 k = j + 1 < 3 ? j + 1 : 0; + b3Vec3 p1 = m->vertices[e->v1]; + b3Vec3 p2 = m->vertices[e->v2]; - u32 v1 = is[j]; - u32 v2 = is[k]; - - b3Vec3 p1 = m->vertices[v1]; - b3Vec3 p2 = m->vertices[v2]; - - // Skip duplicated spring - bool found = false; - for (u32 s = 0; s < m_springCount; ++s) - { - if ((m_springs[s].i1 == v1 && m_springs[s].i2 == v2) || (m_springs[s].i1 == v2 && m_springs[s].i2 == v1)) - { - found = true; - break; - } - } - - if (found == false) - { - b3Spring* S = m_springs + m_springCount; - S->i1 = v1; - S->i2 = v2; - S->L0 = b3Distance(p1, p2); - S->ks = def.ks; - S->kd = def.kd; - ++m_springCount; - } - } + b3Spring* S = m_springs + m_springCount; + S->type = e_strechSpring; + S->i1 = e->v1; + S->i2 = e->v2; + S->L0 = b3Distance(p1, p2); + S->ks = def.ks; + S->kd = def.kd; + ++m_springCount; } + + // Bending + /* + for (u32 i = 0; i < sharedCount; ++i) + { + b3SharedEdge* e = sharedEdges + i; + + b3Vec3 p1 = m->vertices[e->nsv1]; + b3Vec3 p2 = m->vertices[e->nsv2]; + + b3Spring* S = m_springs + m_springCount; + S->type = e_bendSpring; + S->i1 = e->nsv1; + S->i2 = e->nsv2; + S->L0 = b3Distance(p1, p2); + S->ks = def.kb; + S->kd = def.kd; + ++m_springCount; + } + */ + + m_allocator->Free(uniqueEdges); + m_allocator->Free(sharedEdges); + + B3_ASSERT(m_springCount <= springCapacity); } void b3SpringCloth::AddShape(b3Shape* shape) @@ -205,6 +308,12 @@ void b3SpringCloth::UpdateContacts() { for (u32 i = 0; i < m_massCount; ++i) { + // Static masses can't participate in collisions. + if (m_types[i] == e_staticMass) + { + continue; + } + b3MassContact* c = m_contacts + i; bool wasLocked = c->lockOnSurface; @@ -252,22 +361,18 @@ void b3SpringCloth::UpdateContacts() B3_ASSERT(bestSeparation <= 0.0f); - const b3Shape* shape = m_shapes[bestIndex]; + b3Shape* shape = m_shapes[bestIndex]; float32 s = bestSeparation; - - // Ensure the normal points to the shape - b3Vec3 n = -bestNormal; + b3Vec3 n = bestNormal; // Apply position correction - b3Vec3 dx = s * n; - - m_y[i] += dx; + m_y[i] -= s * n; // Update contact state if (wasLocked) { // Was the contact force attractive? - if (c->Fn > -B3_FORCE_THRESHOLD) + if (c->Fn < B3_FORCE_THRESHOLD) { // Terminate the contact. c->lockOnSurface = false; @@ -327,7 +432,8 @@ void b3SpringCloth::Step(float32 dt) b3SpringSolver solver(solverDef); - // Constraint forces that are required to satisfy the constraints + // Extra constraint forces that should have been applied to satisfy the constraints + // todo Find the applied constraint forces. b3Vec3* forces = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); solver.Solve(forces); @@ -380,7 +486,7 @@ void b3SpringCloth::Draw(b3Draw* draw) const { if (m_contacts[i].lockOnSurface) { - if (m_contacts[i].Fn > -B3_FORCE_THRESHOLD) + if (m_contacts[i].Fn < B3_FORCE_THRESHOLD) { draw->DrawPoint(m_x[i], 6.0f, b3Color_yellow); } diff --git a/src/bounce/dynamics/cloth/spring_solver.cpp b/src/bounce/dynamics/cloth/spring_solver.cpp index 2ca00f6..9dd294b 100644 --- a/src/bounce/dynamics/cloth/spring_solver.cpp +++ b/src/bounce/dynamics/cloth/spring_solver.cpp @@ -41,6 +41,7 @@ b3SpringSolver::b3SpringSolver(const b3SpringSolverDef& def) m_x = m_cloth->m_x; m_v = m_cloth->m_v; m_f = m_cloth->m_f; + m_m = m_cloth->m_m; m_inv_m = m_cloth->m_inv_m; m_y = m_cloth->m_y; m_types = m_cloth->m_types; @@ -59,12 +60,8 @@ b3SpringSolver::~b3SpringSolver() void b3SpringSolver::Solve(b3Vec3* f) { - u32 size = m_massCount; - b3MassType* types = m_types; - u32 spring_size = m_springCount; - - m_Jx = (b3Mat33*)m_allocator->Allocate(spring_size * sizeof(b3Mat33)); - m_Jv = (b3Mat33*)m_allocator->Allocate(spring_size * sizeof(b3Mat33)); + m_Jx = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33)); + m_Jv = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33)); // Compute and apply spring forces, store their unique derivatives. InitializeSpringForces(); @@ -76,11 +73,11 @@ void b3SpringSolver::Solve(b3Vec3* f) // b = h * (f0 + h * dfdx * v0 + dfdx * y) // - b3Vec3* b = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); + b3Vec3* b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); Compute_b(b); // - b3Vec3* x = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); + b3Vec3* x = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); // Solve Ax = b if (b3_enablePrecontitioning) @@ -110,12 +107,150 @@ void b3SpringSolver::Solve(b3Vec3* f) m_Jx = nullptr; } -// This outputs the desired acceleration of the masses in the constrained -// directions. -static B3_FORCE_INLINE void b3Compute_z(b3Vec3* out, - u32 size, const b3MassType* types, const b3MassContact* contacts) +static void b3SetZero(b3Vec3* out, u32 size) { for (u32 i = 0; i < size; ++i) + { + out[i].SetZero(); + } +} + +static void b3Copy(b3Vec3* out, const b3Vec3* v, u32 size) +{ + for (u32 i = 0; i < size; ++i) + { + out[i] = v[i]; + } +} + +static void b3Add(b3Vec3* out, const b3Vec3* a, const b3Vec3* b, u32 size) +{ + for (u32 i = 0; i < size; ++i) + { + out[i] = a[i] + b[i]; + } +} + +static void b3Sub(b3Vec3* out, const b3Vec3* a, const b3Vec3* b, u32 size) +{ + for (u32 i = 0; i < size; ++i) + { + out[i] = a[i] - b[i]; + } +} + +static float32 b3Dot(const b3Vec3* a, const b3Vec3* b, u32 size) +{ + float32 result = 0.0f; + for (u32 i = 0; i < size; ++i) + { + result += b3Dot(a[i], b[i]); + } + return result; +} + +#define B3_INDEX(i, j, size) (i + j * size) + +static void b3SetZero(b3Mat33* out, u32 size) +{ + for (u32 i = 0; i < size; ++i) + { + for (u32 j = 0; j < size; ++j) + { + out[B3_INDEX(i, j, size)].SetZero(); + } + } +} + +static void b3Mul(b3Vec3* out, const b3Mat33* M, const b3Vec3* v, u32 size) +{ + for (u32 i = 0; i < size; ++i) + { + out[i].SetZero(); + + for (u32 j = 0; j < size; ++j) + { + out[i] += M[B3_INDEX(i, j, size)] * v[j]; + } + } +} + +static void b3SetZero_Jacobian(b3Mat33* out, u32 springCount) +{ + for (u32 i = 0; i < springCount; ++i) + { + out[i].SetZero(); + } +} + +// J = dfdx or dvdx +static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount, + const b3Mat33* J_ii, const b3Spring* springs, u32 springCount) +{ + b3SetZero(out, massCount); + + for (u32 i = 0; i < springCount; ++i) + { + const b3Spring* S = springs + i; + u32 i1 = S->i1; + u32 i2 = S->i2; + + b3Mat33 J_11 = J_ii[i]; + b3Mat33 J_12 = -J_11; + b3Mat33 J_21 = J_12; + b3Mat33 J_22 = J_11; + + out[i1] += J_11 * v[i1] + J_12 * v[i2]; + out[i2] += J_21 * v[i1] + J_22 * v[i2]; + } +} + +// A = M - h * dfdv - h * h * dfdx +// A * v = (M - h * dfdv - h * h * dfdx) * v = M * v + (-h * dfdv * v) + (-h * h * dfdx * v) +static void b3Mul_A(b3Vec3* out, const b3Vec3* v, u32 massCount, + b3StackAllocator* allocator, + const float32* m, float32 h, const b3Mat33* Jx, const b3Mat33* Jv, const b3Spring* springs, u32 springCount) +{ + // v1 = M * v + b3Vec3* v1 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3)); + for (u32 i = 0; i < massCount; ++i) + { + v1[i] = m[i] * v[i]; + } + + // v2 = (-h * dfdv * v) + b3Vec3* v2 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3)); + b3Mul_Jacobian(v2, v, massCount, Jv, springs, springCount); + for (u32 i = 0; i < massCount; ++i) + { + v2[i] *= -h; + } + + // v3 = (-h * h * dfdx * v) + b3Vec3* v3 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3)); + b3Mul_Jacobian(v3, v, massCount, Jx, springs, springCount); + for (u32 i = 0; i < massCount; ++i) + { + v3[i] *= -h * h; + } + + // v = v1 + v2 + v3 + for (u32 i = 0; i < massCount; ++i) + { + out[i] = v1[i] + v2[i] + v3[i]; + } + + allocator->Free(v3); + allocator->Free(v2); + allocator->Free(v1); +} + +// This outputs the desired acceleration of the masses in the constrained +// directions. +static void b3Compute_z(b3Vec3* out, + u32 massCount, const b3MassType* types, const b3MassContact* contacts) +{ + for (u32 i = 0; i < massCount; ++i) { switch (types[i]) { @@ -144,10 +279,10 @@ static B3_FORCE_INLINE void b3Compute_z(b3Vec3* out, } } -static B3_FORCE_INLINE void b3Filter(b3Vec3* out, - const b3Vec3* v, u32 size, const b3MassType* types, const b3MassContact* contacts) +static void b3Filter(b3Vec3* out, + const b3Vec3* v, u32 massCount, const b3MassType* types, const b3MassContact* contacts) { - for (u32 i = 0; i < size; ++i) + for (u32 i = 0; i < massCount; ++i) { switch (types[i]) { @@ -159,9 +294,9 @@ static B3_FORCE_INLINE void b3Filter(b3Vec3* out, case e_dynamicMass: { if (contacts[i].lockOnSurface) - { + { + // Ensure the prohibited direction points to the solid. b3Vec3 n = contacts[i].n; - b3Mat33 S = b3Mat33_identity - b3Outer(n, n); out[i] = S * v[i]; @@ -180,233 +315,80 @@ static B3_FORCE_INLINE void b3Filter(b3Vec3* out, } } -static B3_FORCE_INLINE void b3SetZero(b3Vec3* out, u32 size) -{ - for (u32 i = 0; i < size; ++i) - { - out[i].SetZero(); - } -} - -static B3_FORCE_INLINE void b3Copy(b3Vec3* out, const b3Vec3* v, u32 size) -{ - for (u32 i = 0; i < size; ++i) - { - out[i] = v[i]; - } -} - -static B3_FORCE_INLINE void b3Add(b3Vec3* out, const b3Vec3* a, const b3Vec3* b, u32 size) -{ - for (u32 i = 0; i < size; ++i) - { - out[i] = a[i] + b[i]; - } -} - -static B3_FORCE_INLINE void b3Sub(b3Vec3* out, const b3Vec3* a, const b3Vec3* b, u32 size) -{ - for (u32 i = 0; i < size; ++i) - { - out[i] = a[i] - b[i]; - } -} - -static B3_FORCE_INLINE float32 b3Dot(const b3Vec3* a, const b3Vec3* b, u32 size) -{ - float32 result = 0.0f; - for (u32 i = 0; i < size; ++i) - { - result += b3Dot(a[i], b[i]); - } - return result; -} - -#define B3_INDEX(i, j, size) (i + j * size) - -static B3_FORCE_INLINE void b3SetZero(b3Mat33* out, u32 size) -{ - for (u32 i = 0; i < size; ++i) - { - for (u32 j = 0; j < size; ++j) - { - out[B3_INDEX(i, j, size)].SetZero(); - } - } -} - -static B3_FORCE_INLINE void b3Mul(b3Vec3* out, const b3Mat33* M, const b3Vec3* v, u32 size) -{ - for (u32 i = 0; i < size; ++i) - { - out[i].SetZero(); - - for (u32 j = 0; j < size; ++j) - { - out[i] += M[B3_INDEX(i, j, size)] * v[j]; - } - } -} - -// J = dfdx or dvdx -static B3_FORCE_INLINE void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 mass_size, - const b3Mat33* J_ii, const b3Spring* springs, u32 spring_size) -{ - b3SetZero(out, mass_size); - - for (u32 i = 0; i < spring_size; ++i) - { - const b3Spring* S = springs + i; - u32 i1 = S->i1; - u32 i2 = S->i2; - - b3Mat33 J_11 = J_ii[i]; - b3Mat33 J_12 = -J_11; - b3Mat33 J_21 = J_12; - b3Mat33 J_22 = J_11; - - out[i1] += J_11 * v[i1] + J_12 * v[i2]; - out[i2] += J_21 * v[i1] + J_22 * v[i2]; - } -} - -static B3_FORCE_INLINE void b3SetZero_Jacobian(b3Mat33* out, u32 size) -{ - for (u32 i = 0; i < size; ++i) - { - out[i].SetZero(); - } -} - -// A = M - h * dfdv - h * h * dfdx -// A * v = (M - h * dfdv - h * h * dfdx) * v = M * v + (-h * dfdv * v) + (-h * h * dfdx * v) -static B3_FORCE_INLINE void b3Mul_A(b3Vec3* out, const b3Vec3* v, u32 mass_size, - b3StackAllocator* allocator, - const float32* inv_m, float32 h, const b3Mat33* Jx, const b3Mat33* Jv, const b3Spring* springs, u32 spring_size) -{ - // v1 = M * v - b3Vec3* v1 = (b3Vec3*)allocator->Allocate(mass_size * sizeof(b3Vec3)); - for (u32 i = 0; i < mass_size; ++i) - { - float32 m = inv_m[i] != 0.0f ? 1.0f / inv_m[i] : 0.0f; - v1[i] = m * v[i]; - } - - // v2 = (-h * dfdv * v) - b3Vec3* v2 = (b3Vec3*)allocator->Allocate(mass_size * sizeof(b3Vec3)); - b3Mul_Jacobian(v2, v, mass_size, Jv, springs, spring_size); - for (u32 i = 0; i < mass_size; ++i) - { - v2[i] *= -h; - } - - // v3 = (-h * h * dfdx * v) - b3Vec3* v3 = (b3Vec3*)allocator->Allocate(mass_size * sizeof(b3Vec3)); - b3Mul_Jacobian(v3, v, mass_size, Jx, springs, spring_size); - for (u32 i = 0; i < mass_size; ++i) - { - v3[i] *= -h * h; - } - - // v = v1 + v2 + v3 - for (u32 i = 0; i < mass_size; ++i) - { - out[i] = v1[i] + v2[i] + v3[i]; - } - - allocator->Free(v3); - allocator->Free(v2); - allocator->Free(v1); -} - void b3SpringSolver::InitializeSpringForces() { - u32 size = m_massCount; - u32 spring_size = m_springCount; - // Zero Jacobians - b3SetZero_Jacobian(m_Jx, spring_size); - b3SetZero_Jacobian(m_Jv, spring_size); + b3SetZero_Jacobian(m_Jx, m_springCount); + b3SetZero_Jacobian(m_Jv, m_springCount); // Compute forces and Jacobians - for (u32 i = 0; i < spring_size; ++i) + for (u32 i = 0; i < m_springCount; ++i) { b3Spring* S = m_springs + i; + + b3SpringType type = S->type; + u32 i1 = S->i1; + u32 i2 = S->i2; + float32 L0 = S->L0; + float32 ks = S->ks; + float32 kd = S->kd; - b3Vec3 x1 = m_x[S->i1]; - b3Vec3 v1 = m_v[S->i1]; + b3Vec3 x1 = m_x[i1]; + b3Vec3 v1 = m_v[i1]; - b3Vec3 x2 = m_x[S->i2]; - b3Vec3 v2 = m_v[S->i2]; + b3Vec3 x2 = m_x[i2]; + b3Vec3 v2 = m_v[i2]; - // Strech - b3Vec3 dx = x2 - x1; + // Compute strech forces + b3Vec3 dx = x1 - x2; float32 L = b3Length(dx); - b3Vec3 n = dx; - if (L > 0.0f) - { - n /= L; - } - float32 C = L - S->L0; + B3_ASSERT(L > 0.0f); - // Compute streching forces - b3Vec3 sf1 = -S->ks * C * -n; + b3Vec3 sf1 = -ks * (1.0f - L0 / L) * dx; b3Vec3 sf2 = -sf1; - m_f[S->i1] += sf1; - m_f[S->i2] += sf2; + m_f[i1] += sf1; + m_f[i2] += sf2; - // Compute damping forces - b3Vec3 dv = v2 - v1; + // C * n = 1 - L0 / L + const b3Mat33 I = b3Mat33_identity; + + float32 L3 = L * L * L; - b3Vec3 df1 = -S->kd * -dv; - b3Vec3 df2 = -df1; - - m_f[S->i1] += df1; - m_f[S->i2] += df2; - - b3Mat33 I = b3Mat33_identity; - - // Compute Jx11 - float32 inv_L = L > 0.0f ? 1.0f / L : 0.0f; - - float32 L2 = L * L; - float32 inv_L2 = L2 > 0.0f ? 1.0f / L2 : 0.0f; - - // Hessian - // del^2_C / del_x - b3Mat33 H_11 = inv_L * I + inv_L2 * b3Outer(dx, -n); - - // del_C / del_x * del_C / del_x^T - b3Mat33 JJ_11 = b3Outer(-n, -n); - - b3Mat33 Jx11 = -S->ks * (C * H_11 + JJ_11); + b3Mat33 Jx11 = -ks * ( (1.0f - L0 / L) * I + (L0 / L3) * b3Outer(dx, dx) ); + m_Jx[i] = Jx11; - // Compute Jv11 - b3Mat33 Jv11 = -S->kd * I; + // Compute damping forces + b3Vec3 dv = v1 - v2; + + b3Vec3 df1 = -kd * dv; + b3Vec3 df2 = -df1; + + m_f[i1] += df1; + m_f[i2] += df2; + + b3Mat33 Jv11 = -kd * I; + m_Jv[i] = Jv11; } } void b3SpringSolver::Compute_b(b3Vec3* b) const { - float32 h = m_h; - u32 size = m_massCount; - // Jx_v = dfdx * v - b3Vec3* Jx_v = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); - b3Mul_Jacobian(Jx_v, m_v, size, m_Jx, m_springs, m_springCount); + b3Vec3* Jx_v = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); + b3Mul_Jacobian(Jx_v, m_v, m_massCount, m_Jx, m_springs, m_springCount); // Jx_y = dfdx * y - b3Vec3* Jx_y = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); - b3Mul_Jacobian(Jx_y, m_y, size, m_Jx, m_springs, m_springCount); + b3Vec3* Jx_y = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); + b3Mul_Jacobian(Jx_y, m_y, m_massCount, m_Jx, m_springs, m_springCount); // b = h * (f0 + h * Jx_v + Jx_y ) - for (u32 i = 0; i < size; ++i) + for (u32 i = 0; i < m_massCount; ++i) { - b[i] = h * (m_f[i] + h * Jx_v[i] + Jx_y[i]); + b[i] = m_h * (m_f[i] + m_h * Jx_v[i] + Jx_y[i]); } m_allocator->Free(Jx_y); @@ -424,7 +406,7 @@ void b3SpringSolver::Solve_MCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3V // Adv = A * dv b3Vec3* Adv = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); - b3Mul_A(Adv, dv, m_massCount, m_allocator, m_inv_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); + b3Mul_A(Adv, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); b3Sub(r, b, Adv, m_massCount); b3Filter(r, r, m_massCount, m_types, m_contacts); @@ -452,7 +434,7 @@ void b3SpringSolver::Solve_MCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3V { // q = filter(A * c) b3Vec3* q = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); - b3Mul_A(q, c, m_massCount, m_allocator, m_inv_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); + b3Mul_A(q, c, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); b3Filter(q, q, m_massCount, m_types, m_contacts); // alpha = epsNew / dot(c, q) @@ -497,68 +479,66 @@ void b3SpringSolver::Solve_MCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3V iterations = iter; // f = A * dv - b - b3Mul_A(e, dv, m_massCount, m_allocator, m_inv_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); + b3Mul_A(e, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); b3Sub(e, e, b, m_massCount); } -static void B3_FORCE_INLINE b3Make_A(b3Mat33* A, - const b3Mat33* Jx, const b3Mat33* Jv, u32 size, - b3StackAllocator* allocator, float32 h, float32* inv_m, - const b3Spring* springs, u32 spring_size) +static void b3Make_A(b3Mat33* A, + const b3Mat33* Jx, const b3Mat33* Jv, u32 massCount, + b3StackAllocator* allocator, float32 h, float32* m, + const b3Spring* springs, u32 springCount) { // A = M - h * dfdv - h * h * dfdx // A = 0 - b3SetZero(A, size); + b3SetZero(A, massCount); // Compute dfdx, dfdv - b3Mat33* dfdx = (b3Mat33*)allocator->Allocate(size * size * sizeof(b3Mat33)); - b3SetZero(dfdx, size); + b3Mat33* dfdx = (b3Mat33*)allocator->Allocate(massCount * massCount * sizeof(b3Mat33)); + b3SetZero(dfdx, massCount); - b3Mat33* dfdv = (b3Mat33*)allocator->Allocate(size * size * sizeof(b3Mat33)); - b3SetZero(dfdv, size); + b3Mat33* dfdv = (b3Mat33*)allocator->Allocate(massCount * massCount * sizeof(b3Mat33)); + b3SetZero(dfdv, massCount); - for (u32 i = 0; i < spring_size; ++i) + for (u32 i = 0; i < springCount; ++i) { const b3Spring* S = springs + i; + u32 i1 = S->i1; + u32 i2 = S->i2; b3Mat33 Jx11 = Jx[i]; b3Mat33 Jx12 = -Jx11; b3Mat33 Jx21 = Jx12; b3Mat33 Jx22 = Jx11; - dfdx[B3_INDEX(S->i1, S->i1, size)] += Jx11; - dfdx[B3_INDEX(S->i1, S->i2, size)] += Jx12; - dfdx[B3_INDEX(S->i2, S->i1, size)] += Jx21; - dfdx[B3_INDEX(S->i2, S->i2, size)] += Jx22; + dfdx[B3_INDEX(i1, i1, massCount)] += Jx11; + dfdx[B3_INDEX(i1, i2, massCount)] += Jx12; + dfdx[B3_INDEX(i2, i1, massCount)] += Jx21; + dfdx[B3_INDEX(i2, i2, massCount)] += Jx22; b3Mat33 Jv11 = Jv[i]; b3Mat33 Jv12 = -Jv11; b3Mat33 Jv21 = Jv12; b3Mat33 Jv22 = Jv11; - dfdv[B3_INDEX(S->i1, S->i1, size)] += Jv11; - dfdv[B3_INDEX(S->i1, S->i2, size)] += Jv12; - dfdv[B3_INDEX(S->i2, S->i1, size)] += Jv21; - dfdv[B3_INDEX(S->i2, S->i2, size)] += Jv22; + dfdv[B3_INDEX(i1, i1, massCount)] += Jv11; + dfdv[B3_INDEX(i1, i2, massCount)] += Jv12; + dfdv[B3_INDEX(i2, i1, massCount)] += Jv21; + dfdv[B3_INDEX(i2, i2, massCount)] += Jv22; } // A += M - for (u32 i = 0; i < size; ++i) + for (u32 i = 0; i < massCount; ++i) { - B3_ASSERT(inv_m[i] != 0.0f); - - float32 m = 1.0f / inv_m[i]; - - A[B3_INDEX(i, i, size)] += b3Diagonal(m); + A[B3_INDEX(i, i, massCount)] += b3Diagonal(m[i]); } // A += - h * dfdv - h * h * dfdx - for (u32 i = 0; i < size; ++i) + for (u32 i = 0; i < massCount; ++i) { - for (u32 j = 0; j < size; ++j) + for (u32 j = 0; j < massCount; ++j) { - A[B3_INDEX(i, j, size)] += (-h * dfdv[B3_INDEX(i, j, size)]) + (-h * h * dfdx[B3_INDEX(i, j, size)]); + A[B3_INDEX(i, j, massCount)] += (-h * dfdv[B3_INDEX(i, j, massCount)]) + (-h * h * dfdx[B3_INDEX(i, j, massCount)]); } } @@ -568,33 +548,29 @@ static void B3_FORCE_INLINE b3Make_A(b3Mat33* A, void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3Vec3* b) const { - u32 size = m_massCount; - b3MassType* types = m_types; - u32 spring_size = m_springCount; + b3Vec3* r = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); - b3Vec3* r = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); + b3Vec3* c = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); - b3Vec3* c = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); + b3Vec3* s = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); - b3Vec3* s = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); - - b3Vec3* inv_P = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); + b3Vec3* inv_P = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); // dv = z - b3Compute_z(dv, size, types, m_contacts); + b3Compute_z(dv, m_massCount, m_types, m_contacts); // P = diag(A)^-1 - b3Vec3* P = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); + b3Vec3* P = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); // A = M - h * dfdv - h * h * dfdx - b3Mat33* A = (b3Mat33*)m_allocator->Allocate(size * size * sizeof(b3Mat33)); - b3Make_A(A, m_Jx, m_Jv, size, m_allocator, m_h, m_inv_m, m_springs, m_springCount); + b3Mat33* A = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33)); + b3Make_A(A, m_Jx, m_Jv, m_massCount, m_allocator, m_h, m_m, m_springs, m_springCount); // Compute P, P^-1 // @todo Optimize so we don't need to compute A. - for (u32 i = 0; i < size; ++i) + for (u32 i = 0; i < m_massCount; ++i) { - b3Mat33 D = A[B3_INDEX(i, i, size)]; + b3Mat33 D = A[B3_INDEX(i, i, m_massCount)]; B3_ASSERT(D[0][0] != 0.0f); B3_ASSERT(D[1][1] != 0.0f); @@ -607,18 +583,18 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 m_allocator->Free(A); // eps0 = dot( filter(b), P * filter(b) ) - b3Vec3* filter_b = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); - b3Filter(filter_b, b, size, types, m_contacts); + b3Vec3* filter_b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); + b3Filter(filter_b, b, m_massCount, m_types, m_contacts); - b3Vec3* P_filter_b = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); - for (u32 i = 0; i < size; ++i) + b3Vec3* P_filter_b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); + for (u32 i = 0; i < m_massCount; ++i) { P_filter_b[i][0] = P[i][0] * filter_b[i][0]; P_filter_b[i][1] = P[i][1] * filter_b[i][1]; P_filter_b[i][2] = P[i][2] * filter_b[i][2]; } - float32 eps0 = b3Dot(filter_b, P_filter_b, size); + float32 eps0 = b3Dot(filter_b, P_filter_b, m_massCount); m_allocator->Free(P_filter_b); m_allocator->Free(filter_b); @@ -627,14 +603,14 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 // r = filter(b - Adv) // Adv = A * dv - b3Vec3* Adv = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); - b3Mul_A(Adv, dv, size, m_allocator, m_inv_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); + b3Vec3* Adv = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); + b3Mul_A(Adv, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); - b3Sub(r, b, Adv, size); + b3Sub(r, b, Adv, m_massCount); m_allocator->Free(Adv); - b3Filter(r, r, size, types, m_contacts); + b3Filter(r, r, m_massCount, m_types, m_contacts); // c = filter(P^-1 * r) for (u32 i = 0; i < m_massCount; ++i) @@ -643,10 +619,10 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 c[i][1] = inv_P[i][1] * r[i][1]; c[i][2] = inv_P[i][2] * r[i][2]; } - b3Filter(c, c, size, types, m_contacts); + b3Filter(c, c, m_massCount, m_types, m_contacts); // epsNew = dot(r, c) - float32 epsNew = b3Dot(r, c, size); + float32 epsNew = b3Dot(r, c, m_massCount); // [0, 1] const float32 kTol = 0.25f; @@ -659,13 +635,13 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 while (iter < kMaxIters && epsNew > kTol * kTol * eps0) { // q = filter(A * c) - b3Vec3* q = (b3Vec3*)m_allocator->Allocate(size * sizeof(b3Vec3)); + b3Vec3* q = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); - b3Mul_A(q, c, size, m_allocator, m_inv_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); - b3Filter(q, q, size, types, m_contacts); + b3Mul_A(q, c, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); + b3Filter(q, q, m_massCount, m_types, m_contacts); // alpha = epsNew / dot(c, q) - float32 alpha = epsNew / b3Dot(c, q, size); + float32 alpha = epsNew / b3Dot(c, q, m_massCount); // x = x + alpha * c for (u32 i = 0; i < m_massCount; ++i) @@ -693,7 +669,7 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 float32 epsOld = epsNew; // epsNew = dot(r, s) - epsNew = b3Dot(r, s, size); + epsNew = b3Dot(r, s, m_massCount); // beta = epsNew / epsOld float32 beta = epsNew / epsOld; @@ -703,7 +679,7 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 { c[i] = s[i] + beta * c[i]; } - b3Filter(c, c, size, types, m_contacts); + b3Filter(c, c, m_massCount, m_types, m_contacts); ++iter; } @@ -717,6 +693,6 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 // Residual error // f = A * x - b - b3Mul_A(e, dv, size, m_allocator, m_inv_m, m_h, m_Jx, m_Jv, m_springs, spring_size); - b3Sub(e, e, b, size); + b3Mul_A(e, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); + b3Sub(e, e, b, m_massCount); } \ No newline at end of file