improve CG performance using CSR matrix format
This commit is contained in:
		| @@ -107,10 +107,10 @@ enum b3MassType | |||||||
| //  | //  | ||||||
| struct b3MassContact | struct b3MassContact | ||||||
| { | { | ||||||
|  | 	u32 j; | ||||||
| 	b3Vec3 n, t1, t2; | 	b3Vec3 n, t1, t2; | ||||||
| 	float32 Fn, Ft1, Ft2; | 	float32 Fn, Ft1, Ft2; | ||||||
| 	u32 j; | 	bool lockN, lockT1, lockT2; | ||||||
| 	bool lockOnSurface, slideOnSurface; |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| // Time step statistics | // Time step statistics | ||||||
| @@ -252,7 +252,7 @@ inline void b3SpringCloth::SetType(u32 i, b3MassType type) | |||||||
| 		m_v[i].SetZero(); | 		m_v[i].SetZero(); | ||||||
| 		m_y[i].SetZero(); | 		m_y[i].SetZero(); | ||||||
| 		 | 		 | ||||||
| 		m_contacts[i].lockOnSurface = false; | 		m_contacts[i].lockN = false; | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -25,6 +25,9 @@ | |||||||
| class b3SpringCloth; | class b3SpringCloth; | ||||||
| class b3StackAllocator; | class b3StackAllocator; | ||||||
|  |  | ||||||
|  | struct b3DenseVec3; | ||||||
|  | struct b3SparseMat33; | ||||||
|  |  | ||||||
| struct b3MassContact; | struct b3MassContact; | ||||||
| struct b3Spring; | struct b3Spring; | ||||||
|  |  | ||||||
| @@ -43,25 +46,25 @@ public: | |||||||
|  |  | ||||||
| 	~b3SpringSolver(); | 	~b3SpringSolver(); | ||||||
|  |  | ||||||
| 	void Solve(b3Vec3* constraintForces); | 	void Solve(b3DenseVec3& extraForces); | ||||||
|  |  | ||||||
| 	u32 GetIterations() const; | 	u32 GetIterations() const; | ||||||
| private: | private: | ||||||
| 	// Apply internal forces and store their unique derivatives. | 	// Apply internal forces and store their unique derivatives. | ||||||
| 	void InitializeSpringForces(); | 	void ApplySpringForces(); | ||||||
|  |  | ||||||
| 	// Initialize b, from Ax = b | 	// Compute A and b in Ax = b | ||||||
| 	void Compute_b(b3Vec3* b) const; | 	void Compute_A_b(b3SparseMat33& A, b3DenseVec3& b) const; | ||||||
|  |  | ||||||
| 	// Solve Ax = b using the Modified Conjugate Gradient (MCG).  | 	// Solve Ax = b using the Modified Conjugate Gradient (MCG).  | ||||||
| 	// Output x and the residual error f. | 	// Output x and the residual error f. | ||||||
| 	void Solve_MCG(b3Vec3* x, b3Vec3* f, u32& iterations, const b3Vec3* b) const; | 	void Solve_MCG(b3DenseVec3& x, const b3SparseMat33& A, b3DenseVec3& f, u32& iterations, const b3DenseVec3& b) const; | ||||||
|  |  | ||||||
| 	// Solve Ax = b using MCG with Jacobi preconditioning. | 	// Solve Ax = b using MCG with Jacobi preconditioning. | ||||||
| 	// Output x and the residual error f. | 	// Output x and the residual error f. | ||||||
| 	// This method is slower than MCG because we have to compute the preconditioning  | 	// This method is slower than MCG because we have to compute the preconditioning  | ||||||
| 	// matrix P, but it can improve convergence. | 	// matrix P, but it can improve convergence. | ||||||
| 	void Solve_MPCG(b3Vec3* x, b3Vec3* f, u32& iterations, const b3Vec3* b) const; | 	void Solve_MPCG(b3DenseVec3& x, const b3SparseMat33& A, b3DenseVec3& f, u32& iterations, const b3DenseVec3& b) const; | ||||||
|  |  | ||||||
| 	b3SpringCloth * m_cloth; | 	b3SpringCloth * m_cloth; | ||||||
| 	float32 m_h; | 	float32 m_h; | ||||||
|   | |||||||
| @@ -18,12 +18,18 @@ | |||||||
|  |  | ||||||
| #include <bounce/dynamics/cloth/spring_cloth.h> | #include <bounce/dynamics/cloth/spring_cloth.h> | ||||||
| #include <bounce/dynamics/cloth/spring_solver.h> | #include <bounce/dynamics/cloth/spring_solver.h> | ||||||
|  | #include <bounce/dynamics/cloth/dense_vec3.h> | ||||||
|  | #include <bounce/dynamics/cloth/sparse_mat33.h> | ||||||
| #include <bounce/dynamics/shapes/shape.h> | #include <bounce/dynamics/shapes/shape.h> | ||||||
| #include <bounce/collision/shapes/mesh.h> | #include <bounce/collision/shapes/mesh.h> | ||||||
| #include <bounce/common/memory/stack_allocator.h> | #include <bounce/common/memory/stack_allocator.h> | ||||||
|  |  | ||||||
| #define B3_FORCE_THRESHOLD (0.1f) | #define B3_FORCE_THRESHOLD (0.1f) | ||||||
|  |  | ||||||
|  | #define B3_CLOTH_BENDING 0 | ||||||
|  |  | ||||||
|  | #define B3_CLOTH_FRICTION 0 | ||||||
|  |  | ||||||
| b3SpringCloth::b3SpringCloth() | b3SpringCloth::b3SpringCloth() | ||||||
| { | { | ||||||
| 	m_allocator = nullptr; | 	m_allocator = nullptr; | ||||||
| @@ -95,7 +101,7 @@ static void b3FindEdges(b3UniqueEdge* uniqueEdges, u32& uniqueCount, b3SharedEdg | |||||||
| 		{ | 		{ | ||||||
| 			u32 t1v1 = i1s[j1]; | 			u32 t1v1 = i1s[j1]; | ||||||
|  |  | ||||||
| 			u32 t1v2 = i1s[ b3NextIndex(j1) ]; | 			u32 t1v2 = i1s[b3NextIndex(j1)]; | ||||||
|  |  | ||||||
| 			u32 foundCount = 0; | 			u32 foundCount = 0; | ||||||
|  |  | ||||||
| @@ -107,7 +113,7 @@ static void b3FindEdges(b3UniqueEdge* uniqueEdges, u32& uniqueCount, b3SharedEdg | |||||||
| 				for (u32 j2 = 0; j2 < 3; ++j2) | 				for (u32 j2 = 0; j2 < 3; ++j2) | ||||||
| 				{ | 				{ | ||||||
| 					u32 t2v1 = i2s[j2]; | 					u32 t2v1 = i2s[j2]; | ||||||
| 					u32 t2v2 = i2s[ b3NextIndex(j2) ]; | 					u32 t2v2 = i2s[b3NextIndex(j2)]; | ||||||
|  |  | ||||||
| 					if (t1v1 == t2v2 && t1v2 == t2v1) | 					if (t1v1 == t2v2 && t1v2 == t2v1) | ||||||
| 					{ | 					{ | ||||||
| @@ -177,8 +183,9 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def) | |||||||
| 		m_contacts[i].Fn = 0.0f; | 		m_contacts[i].Fn = 0.0f; | ||||||
| 		m_contacts[i].Ft1 = 0.0f; | 		m_contacts[i].Ft1 = 0.0f; | ||||||
| 		m_contacts[i].Ft2 = 0.0f; | 		m_contacts[i].Ft2 = 0.0f; | ||||||
| 		m_contacts[i].lockOnSurface = false; | 		m_contacts[i].lockN = false; | ||||||
| 		m_contacts[i].slideOnSurface = false; | 		m_contacts[i].lockT1 = false; | ||||||
|  | 		m_contacts[i].lockT2 = false; | ||||||
|  |  | ||||||
| 		m_x[i] = m->vertices[i]; | 		m_x[i] = m->vertices[i]; | ||||||
| 		m_v[i].SetZero(); | 		m_v[i].SetZero(); | ||||||
| @@ -251,8 +258,8 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def) | |||||||
| 		++m_springCount; | 		++m_springCount; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | #if B3_CLOTH_BENDING == 1 | ||||||
| 	// Bending | 	// Bending | ||||||
| 	/* |  | ||||||
| 	for (u32 i = 0; i < sharedCount; ++i) | 	for (u32 i = 0; i < sharedCount; ++i) | ||||||
| 	{ | 	{ | ||||||
| 		b3SharedEdge* e = sharedEdges + i; | 		b3SharedEdge* e = sharedEdges + i; | ||||||
| @@ -269,7 +276,8 @@ void b3SpringCloth::Initialize(const b3SpringClothDef& def) | |||||||
| 		S->kd = def.kd; | 		S->kd = def.kd; | ||||||
| 		++m_springCount; | 		++m_springCount; | ||||||
| 	} | 	} | ||||||
| 	*/ |  | ||||||
|  | #endif // #if B3_CLOTH_BENDING | ||||||
|  |  | ||||||
| 	m_allocator->Free(uniqueEdges); | 	m_allocator->Free(uniqueEdges); | ||||||
| 	m_allocator->Free(sharedEdges); | 	m_allocator->Free(sharedEdges); | ||||||
| @@ -316,8 +324,7 @@ void b3SpringCloth::UpdateContacts() | |||||||
|  |  | ||||||
| 		b3MassContact* c = m_contacts + i; | 		b3MassContact* c = m_contacts + i; | ||||||
|  |  | ||||||
| 		bool wasLocked = c->lockOnSurface; | 		bool wasLockedN = c->lockN; | ||||||
| 		bool wasSliding = c->slideOnSurface; |  | ||||||
|  |  | ||||||
| 		b3Sphere s1; | 		b3Sphere s1; | ||||||
| 		s1.vertex = m_x[i]; | 		s1.vertex = m_x[i]; | ||||||
| @@ -354,8 +361,9 @@ void b3SpringCloth::UpdateContacts() | |||||||
| 			c->Fn = 0.0f; | 			c->Fn = 0.0f; | ||||||
| 			c->Ft1 = 0.0f; | 			c->Ft1 = 0.0f; | ||||||
| 			c->Ft2 = 0.0f; | 			c->Ft2 = 0.0f; | ||||||
| 			c->lockOnSurface = false; | 			c->lockN = false; | ||||||
| 			c->slideOnSurface = false; | 			c->lockT1 = false; | ||||||
|  | 			c->lockT2 = false; | ||||||
| 			continue; | 			continue; | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| @@ -369,41 +377,100 @@ void b3SpringCloth::UpdateContacts() | |||||||
| 		m_y[i] -= s * n; | 		m_y[i] -= s * n; | ||||||
|  |  | ||||||
| 		// Update contact state | 		// Update contact state | ||||||
| 		if (wasLocked) | 		if (wasLockedN) | ||||||
| 		{ | 		{ | ||||||
| 			// Was the contact force attractive? | 			// Was the contact force attractive? | ||||||
| 			if (c->Fn < B3_FORCE_THRESHOLD) | 			if (c->Fn < B3_FORCE_THRESHOLD) | ||||||
| 			{ | 			{ | ||||||
| 				// Terminate the contact. | 				// Terminate the contact. | ||||||
| 				c->lockOnSurface = false; | 				c->Fn = 0.0f; | ||||||
|  | 				c->Ft1 = 0.0f; | ||||||
|  | 				c->Ft2 = 0.0f; | ||||||
|  | 				c->lockN = false; | ||||||
|  | 				c->lockT1 = false; | ||||||
|  | 				c->lockT2 = false; | ||||||
| 				continue; | 				continue; | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			// Since the contact force was repulsive  | 			// Since the contact force was repulsive  | ||||||
| 			// maintain the acceleration constraint. | 			// maintain the normal acceleration constraint. | ||||||
| 			c->n = n; |  | ||||||
| 			c->j = bestIndex; | 			c->j = bestIndex; | ||||||
| 			c->lockOnSurface = true; | 			c->n = n; | ||||||
| 		} | 		} | ||||||
| 		else | 		else | ||||||
| 		{ | 		{ | ||||||
| 			// The contact has began. | 			// The contact has began. | ||||||
|  | 			c->j = bestIndex; | ||||||
| 			c->n = n; | 			c->n = n; | ||||||
| 			c->Fn = 0.0f; | 			c->Fn = 0.0f; | ||||||
| 			c->Ft1 = 0.0f; | 			c->Ft1 = 0.0f; | ||||||
| 			c->Ft2 = 0.0f; | 			c->Ft2 = 0.0f; | ||||||
| 			c->j = bestIndex; | 			c->lockN = true; | ||||||
| 			c->lockOnSurface = true; | 		} | ||||||
|  |  | ||||||
|  | #if B3_CLOTH_FRICTION == 1 | ||||||
|  |  | ||||||
|  | 		// Apply friction impulses | ||||||
|  |  | ||||||
|  | 		// Note without a friction force, the tangential acceleration won't be  | ||||||
|  | 		// removed. | ||||||
|  |  | ||||||
| 		// Relative velocity | 		// Relative velocity | ||||||
| 		b3Vec3 dv = m_v[i]; | 		b3Vec3 dv = m_v[i]; | ||||||
|  |  | ||||||
| 		b3MakeTangents(c->t1, c->t2, dv, n); | 		b3MakeTangents(c->t1, c->t2, dv, n); | ||||||
| 			c->slideOnSurface = false; |  | ||||||
|  |  | ||||||
| 			continue; | 		// Coefficients of friction for the solid | ||||||
|  | 		const float32 uk = shape->GetFriction(); | ||||||
|  | 		const float32 us = 2.0f * uk; | ||||||
|  |  | ||||||
|  | 		float32 dvn = b3Dot(dv, n); | ||||||
|  | 		float32 normalImpulse = -m_inv_m[i] * dvn; | ||||||
|  |  | ||||||
|  | 		b3Vec3 ts[2]; | ||||||
|  | 		ts[0] = c->t1; | ||||||
|  | 		ts[1] = c->t2; | ||||||
|  |  | ||||||
|  | 		bool lockT[2]; | ||||||
|  |  | ||||||
|  | 		for (u32 k = 0; k < 2; ++k) | ||||||
|  | 		{ | ||||||
|  | 			b3Vec3 t = ts[k]; | ||||||
|  |  | ||||||
|  | 			float32 dvt = b3Dot(dv, t); | ||||||
|  | 			float32 tangentImpulse = -m_inv_m[i] * dvt; | ||||||
|  |  | ||||||
|  | 			float32 maxStaticImpulse = us * normalImpulse; | ||||||
|  | 			if (tangentImpulse * tangentImpulse > maxStaticImpulse * maxStaticImpulse) | ||||||
|  | 			{ | ||||||
|  | 				lockT[k] = false; | ||||||
|  |  | ||||||
|  | 				// Dynamic friction | ||||||
|  | 				float32 maxDynamicImpulse = uk * normalImpulse; | ||||||
|  | 				if (tangentImpulse * tangentImpulse > maxDynamicImpulse * maxDynamicImpulse) | ||||||
|  | 				{ | ||||||
|  | 					b3Vec3 P = tangentImpulse * t; | ||||||
|  |  | ||||||
|  | 					m_v[i] += m_m[i] * P; | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  | 			else | ||||||
|  | 			{ | ||||||
|  | 				lockT[k] = true; | ||||||
|  |  | ||||||
|  | 				// Static friction | ||||||
|  | 				b3Vec3 P = tangentImpulse * t; | ||||||
|  |  | ||||||
|  | 				m_v[i] += m_m[i] * P; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		c->lockT1 = lockT[0]; | ||||||
|  | 		c->lockT2 = lockT[1]; | ||||||
|  |  | ||||||
|  | #endif // #if B3_CLOTH_FRICTION | ||||||
|  |  | ||||||
|  | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| void b3SpringCloth::Step(float32 dt) | void b3SpringCloth::Step(float32 dt) | ||||||
| @@ -426,6 +493,7 @@ void b3SpringCloth::Step(float32 dt) | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Integrate | 	// Integrate | ||||||
|  |  | ||||||
| 	b3SpringSolverDef solverDef; | 	b3SpringSolverDef solverDef; | ||||||
| 	solverDef.cloth = this; | 	solverDef.cloth = this; | ||||||
| 	solverDef.dt = dt; | 	solverDef.dt = dt; | ||||||
| @@ -434,7 +502,7 @@ void b3SpringCloth::Step(float32 dt) | |||||||
|  |  | ||||||
| 	// Extra constraint forces that should have been applied to satisfy the constraints | 	// Extra constraint forces that should have been applied to satisfy the constraints | ||||||
| 	// todo Find the applied constraint forces. | 	// todo Find the applied constraint forces. | ||||||
| 	b3Vec3* forces = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 forces(m_massCount); | ||||||
|  |  | ||||||
| 	solver.Solve(forces); | 	solver.Solve(forces); | ||||||
|  |  | ||||||
| @@ -445,19 +513,17 @@ void b3SpringCloth::Step(float32 dt) | |||||||
| 	{ | 	{ | ||||||
| 		b3Vec3 force = forces[i]; | 		b3Vec3 force = forces[i]; | ||||||
|  |  | ||||||
| 		b3MassContact* contact = m_contacts + i; | 		b3MassContact* c = m_contacts + i; | ||||||
|  |  | ||||||
| 		// Signed normal force magnitude | 		// Signed normal force magnitude | ||||||
| 		contact->Fn = b3Dot(force, contact->n); | 		c->Fn = b3Dot(force, c->n); | ||||||
|  |  | ||||||
| 		// Signed tangent forces magnitude | 		// Signed tangent forces magnitude | ||||||
| 		contact->Ft1 = b3Dot(force, contact->t1); | 		c->Ft1 = b3Dot(force, c->t1); | ||||||
| 		contact->Ft2 = b3Dot(force, contact->t2); | 		c->Ft2 = b3Dot(force, c->t2); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	m_allocator->Free(forces); | 	// Clear position alteration | ||||||
|  |  | ||||||
| 	// Clear position correction |  | ||||||
| 	for (u32 i = 0; i < m_massCount; ++i) | 	for (u32 i = 0; i < m_massCount; ++i) | ||||||
| 	{ | 	{ | ||||||
| 		m_y[i].SetZero(); | 		m_y[i].SetZero(); | ||||||
| @@ -484,7 +550,7 @@ void b3SpringCloth::Draw(b3Draw* draw) const | |||||||
|  |  | ||||||
| 	for (u32 i = 0; i < m->vertexCount; ++i) | 	for (u32 i = 0; i < m->vertexCount; ++i) | ||||||
| 	{ | 	{ | ||||||
| 		if (m_contacts[i].lockOnSurface) | 		if (m_contacts[i].lockN) | ||||||
| 		{ | 		{ | ||||||
| 			if (m_contacts[i].Fn < B3_FORCE_THRESHOLD) | 			if (m_contacts[i].Fn < B3_FORCE_THRESHOLD) | ||||||
| 			{ | 			{ | ||||||
|   | |||||||
| @@ -18,6 +18,8 @@ | |||||||
|  |  | ||||||
| #include <bounce/dynamics/cloth/spring_solver.h> | #include <bounce/dynamics/cloth/spring_solver.h> | ||||||
| #include <bounce/dynamics/cloth/spring_cloth.h> | #include <bounce/dynamics/cloth/spring_cloth.h> | ||||||
|  | #include <bounce/dynamics/cloth/dense_vec3.h> | ||||||
|  | #include <bounce/dynamics/cloth/sparse_mat33.h> | ||||||
| #include <bounce/common/memory/stack_allocator.h> | #include <bounce/common/memory/stack_allocator.h> | ||||||
|  |  | ||||||
| // Here, we solve Ax = b using the Modified Conjugate Gradient method. | // Here, we solve Ax = b using the Modified Conjugate Gradient method. | ||||||
| @@ -58,13 +60,14 @@ b3SpringSolver::~b3SpringSolver() | |||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void b3SpringSolver::Solve(b3Vec3* f) | void b3SpringSolver::Solve(b3DenseVec3& f) | ||||||
| { | { | ||||||
|  | 	// | ||||||
| 	m_Jx = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33)); | 	m_Jx = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33)); | ||||||
| 	m_Jv = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33)); | 	m_Jv = (b3Mat33*)m_allocator->Allocate(m_springCount * sizeof(b3Mat33)); | ||||||
|  |  | ||||||
| 	// Compute and apply spring forces, store their unique derivatives. | 	// Apply spring forces. Also, store their unique derivatives. | ||||||
| 	InitializeSpringForces(); | 	ApplySpringForces(); | ||||||
|  |  | ||||||
| 	// Integrate | 	// Integrate | ||||||
|  |  | ||||||
| @@ -72,21 +75,31 @@ void b3SpringSolver::Solve(b3Vec3* f) | |||||||
| 	// A = M - h * dfdv - h * h * dfdx | 	// A = M - h * dfdv - h * h * dfdx | ||||||
| 	// b = h * (f0 + h * dfdx * v0 + dfdx * y)  | 	// b = h * (f0 + h * dfdx * v0 + dfdx * y)  | ||||||
|  |  | ||||||
| 	// | 	// Allocate matrix memory for the worst case. | ||||||
| 	b3Vec3* b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	u32 nzCount = m_massCount * m_massCount; | ||||||
| 	Compute_b(b); | 	b3Mat33* nzElements = (b3Mat33*)m_allocator->Allocate(nzCount * sizeof(b3Mat33)); | ||||||
|  | 	u32* nzColumns = (u32*)m_allocator->Allocate(nzCount * sizeof(u32)); | ||||||
|  | 	u32* rowPtrs = (u32*)m_allocator->Allocate((m_massCount + 1) * sizeof(u32)); | ||||||
|  |  | ||||||
|  | 	{ | ||||||
|  | 		b3SparseMat33 A(m_massCount, m_massCount, nzCount, nzElements, rowPtrs, nzColumns); | ||||||
|  |  | ||||||
| 		// | 		// | ||||||
| 	b3Vec3* x = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 		b3DenseVec3 b(m_massCount); | ||||||
|  | 		 | ||||||
|  | 		// | ||||||
|  | 		Compute_A_b(A, b); | ||||||
|  |  | ||||||
|  | 		// x | ||||||
|  | 		b3DenseVec3 x(m_massCount); | ||||||
|  |  | ||||||
| 	// Solve Ax = b |  | ||||||
| 		if (b3_enablePrecontitioning) | 		if (b3_enablePrecontitioning) | ||||||
| 		{ | 		{ | ||||||
| 		Solve_MPCG(x, f, m_iterations, b); | 			Solve_MPCG(x, A, f, m_iterations, b); | ||||||
| 		} | 		} | ||||||
| 		else | 		else | ||||||
| 		{ | 		{ | ||||||
| 		Solve_MCG(x, f, m_iterations, b); | 			Solve_MCG(x, A, f, m_iterations, b); | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		// Update state | 		// Update state | ||||||
| @@ -97,9 +110,11 @@ void b3SpringSolver::Solve(b3Vec3* f) | |||||||
| 			// dx = h * (v0 + dv) + y = h * v1 + y | 			// dx = h * (v0 + dv) + y = h * v1 + y | ||||||
| 			m_x[i] += m_h * m_v[i] + m_y[i]; | 			m_x[i] += m_h * m_v[i] + m_y[i]; | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	m_allocator->Free(x); | 	m_allocator->Free(rowPtrs); | ||||||
| 	m_allocator->Free(b); | 	m_allocator->Free(nzColumns); | ||||||
|  | 	m_allocator->Free(nzElements); | ||||||
|  |  | ||||||
| 	m_allocator->Free(m_Jv); | 	m_allocator->Free(m_Jv); | ||||||
| 	m_allocator->Free(m_Jx); | 	m_allocator->Free(m_Jx); | ||||||
| @@ -107,6 +122,8 @@ void b3SpringSolver::Solve(b3Vec3* f) | |||||||
| 	m_Jx = nullptr; | 	m_Jx = nullptr; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #define B3_INDEX(i, j, size) (i + j * size) | ||||||
|  |  | ||||||
| static void b3SetZero(b3Vec3* out, u32 size) | static void b3SetZero(b3Vec3* out, u32 size) | ||||||
| { | { | ||||||
| 	for (u32 i = 0; i < size; ++i) | 	for (u32 i = 0; i < size; ++i) | ||||||
| @@ -115,63 +132,11 @@ static void b3SetZero(b3Vec3* out, u32 size) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| static void b3Copy(b3Vec3* out, const b3Vec3* v, u32 size) |  | ||||||
| { |  | ||||||
| 	for (u32 i = 0; i < size; ++i) |  | ||||||
| 	{ |  | ||||||
| 		out[i] = v[i]; |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void b3Add(b3Vec3* out, const b3Vec3* a, const b3Vec3* b, u32 size) |  | ||||||
| { |  | ||||||
| 	for (u32 i = 0; i < size; ++i) |  | ||||||
| 	{ |  | ||||||
| 		out[i] = a[i] + b[i]; |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void b3Sub(b3Vec3* out, const b3Vec3* a, const b3Vec3* b, u32 size) |  | ||||||
| { |  | ||||||
| 	for (u32 i = 0; i < size; ++i) |  | ||||||
| 	{ |  | ||||||
| 		out[i] = a[i] - b[i]; |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static float32 b3Dot(const b3Vec3* a, const b3Vec3* b, u32 size) |  | ||||||
| { |  | ||||||
| 	float32 result = 0.0f; |  | ||||||
| 	for (u32 i = 0; i < size; ++i) |  | ||||||
| 	{ |  | ||||||
| 		result += b3Dot(a[i], b[i]); |  | ||||||
| 	} |  | ||||||
| 	return result; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #define B3_INDEX(i, j, size) (i + j * size) |  | ||||||
|  |  | ||||||
| static void b3SetZero(b3Mat33* out, u32 size) | static void b3SetZero(b3Mat33* out, u32 size) | ||||||
| { | { | ||||||
| 	for (u32 i = 0; i < size; ++i) | 	for (u32 i = 0; i < size * size; ++i) | ||||||
| 	{ |  | ||||||
| 		for (u32 j = 0; j < size; ++j) |  | ||||||
| 		{ |  | ||||||
| 			out[B3_INDEX(i, j, size)].SetZero(); |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void b3Mul(b3Vec3* out, const b3Mat33* M, const b3Vec3* v, u32 size) |  | ||||||
| { |  | ||||||
| 	for (u32 i = 0; i < size; ++i) |  | ||||||
| 	{ | 	{ | ||||||
| 		out[i].SetZero(); | 		out[i].SetZero(); | ||||||
|  |  | ||||||
| 		for (u32 j = 0; j < size; ++j) |  | ||||||
| 		{ |  | ||||||
| 			out[i] += M[B3_INDEX(i, j, size)] * v[j]; |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -205,117 +170,7 @@ static void b3Mul_Jacobian(b3Vec3* out, const b3Vec3* v, u32 massCount, | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| // A = M - h * dfdv - h * h * dfdx | void b3SpringSolver::ApplySpringForces() | ||||||
| // A * v = (M - h * dfdv - h * h * dfdx) * v = M * v + (-h * dfdv * v) + (-h * h * dfdx * v)  |  | ||||||
| static void b3Mul_A(b3Vec3* out, const b3Vec3* v, u32 massCount, |  | ||||||
| 	b3StackAllocator* allocator, |  | ||||||
| 	const float32* m, float32 h, const b3Mat33* Jx, const b3Mat33* Jv, const b3Spring* springs, u32 springCount) |  | ||||||
| { |  | ||||||
| 	// v1 = M * v |  | ||||||
| 	b3Vec3* v1 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3)); |  | ||||||
| 	for (u32 i = 0; i < massCount; ++i) |  | ||||||
| 	{ |  | ||||||
| 		v1[i] = m[i] * v[i]; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// v2 = (-h * dfdv * v) |  | ||||||
| 	b3Vec3* v2 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3)); |  | ||||||
| 	b3Mul_Jacobian(v2, v, massCount, Jv, springs, springCount); |  | ||||||
| 	for (u32 i = 0; i < massCount; ++i) |  | ||||||
| 	{ |  | ||||||
| 		v2[i] *= -h; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// v3 = (-h * h * dfdx * v) |  | ||||||
| 	b3Vec3* v3 = (b3Vec3*)allocator->Allocate(massCount * sizeof(b3Vec3)); |  | ||||||
| 	b3Mul_Jacobian(v3, v, massCount, Jx, springs, springCount); |  | ||||||
| 	for (u32 i = 0; i < massCount; ++i) |  | ||||||
| 	{ |  | ||||||
| 		v3[i] *= -h * h; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// v = v1 + v2 + v3 |  | ||||||
| 	for (u32 i = 0; i < massCount; ++i) |  | ||||||
| 	{ |  | ||||||
| 		out[i] = v1[i] + v2[i] + v3[i]; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	allocator->Free(v3); |  | ||||||
| 	allocator->Free(v2); |  | ||||||
| 	allocator->Free(v1); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // This outputs the desired acceleration of the masses in the constrained  |  | ||||||
| // directions. |  | ||||||
| static void b3Compute_z(b3Vec3* out, |  | ||||||
| 	u32 massCount, const b3MassType* types, const b3MassContact* contacts) |  | ||||||
| { |  | ||||||
| 	for (u32 i = 0; i < massCount; ++i) |  | ||||||
| 	{ |  | ||||||
| 		switch (types[i]) |  | ||||||
| 		{ |  | ||||||
| 		case e_staticMass: |  | ||||||
| 		{ |  | ||||||
| 			out[i].SetZero(); |  | ||||||
| 			break; |  | ||||||
| 		} |  | ||||||
| 		case e_dynamicMass: |  | ||||||
| 		{ |  | ||||||
| 			if (contacts[i].lockOnSurface) |  | ||||||
| 			{ |  | ||||||
| 				out[i].SetZero(); |  | ||||||
| 				break; |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			out[i].SetZero(); |  | ||||||
| 			break; |  | ||||||
| 		} |  | ||||||
| 		default: |  | ||||||
| 		{ |  | ||||||
| 			B3_ASSERT(false); |  | ||||||
| 			break; |  | ||||||
| 		} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void b3Filter(b3Vec3* out, |  | ||||||
| 	const b3Vec3* v, u32 massCount, const b3MassType* types, const b3MassContact* contacts) |  | ||||||
| { |  | ||||||
| 	for (u32 i = 0; i < massCount; ++i) |  | ||||||
| 	{ |  | ||||||
| 		switch (types[i]) |  | ||||||
| 		{ |  | ||||||
| 		case e_staticMass: |  | ||||||
| 		{ |  | ||||||
| 			out[i].SetZero(); |  | ||||||
| 			break; |  | ||||||
| 		} |  | ||||||
| 		case e_dynamicMass: |  | ||||||
| 		{ |  | ||||||
| 			if (contacts[i].lockOnSurface) |  | ||||||
| 			{ |  | ||||||
| 				// Ensure the prohibited direction points to the solid. |  | ||||||
| 				b3Vec3 n = contacts[i].n; |  | ||||||
| 				b3Mat33 S = b3Mat33_identity - b3Outer(n, n); |  | ||||||
|  |  | ||||||
| 				out[i] = S * v[i]; |  | ||||||
| 				break; |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			out[i] = v[i]; |  | ||||||
| 			break; |  | ||||||
| 		} |  | ||||||
| 		default: |  | ||||||
| 		{ |  | ||||||
| 			B3_ASSERT(false); |  | ||||||
| 			break; |  | ||||||
| 		} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void b3SpringSolver::InitializeSpringForces() |  | ||||||
| { | { | ||||||
| 	// Zero Jacobians | 	// Zero Jacobians | ||||||
| 	b3SetZero_Jacobian(m_Jx, m_springCount); | 	b3SetZero_Jacobian(m_Jx, m_springCount); | ||||||
| @@ -356,7 +211,7 @@ void b3SpringSolver::InitializeSpringForces() | |||||||
|  |  | ||||||
| 		float32 L3 = L * L * L; | 		float32 L3 = L * L * L; | ||||||
|  |  | ||||||
| 		b3Mat33 Jx11 = -ks * ( (1.0f - L0 / L) * I + (L0 / L3) * b3Outer(dx, dx) ); | 		b3Mat33 Jx11 = -ks * ((1.0f - L0 / L) * I + (L0 / L3) * b3Outer(dx, dx)); | ||||||
|  |  | ||||||
| 		m_Jx[i] = Jx11; | 		m_Jx[i] = Jx11; | ||||||
|  |  | ||||||
| @@ -375,8 +230,120 @@ void b3SpringSolver::InitializeSpringForces() | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| void b3SpringSolver::Compute_b(b3Vec3* b) const | static B3_FORCE_INLINE bool b3IsZero(const b3Mat33& A) | ||||||
| { | { | ||||||
|  | 	bool isZeroX = b3Dot(A.x, A.x) <= B3_EPSILON * B3_EPSILON; | ||||||
|  | 	bool isZeroY = b3Dot(A.y, A.y) <= B3_EPSILON * B3_EPSILON; | ||||||
|  | 	bool isZeroZ = b3Dot(A.z, A.z) <= B3_EPSILON * B3_EPSILON; | ||||||
|  |  | ||||||
|  | 	return isZeroX * isZeroY * isZeroZ; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void b3SpringSolver::Compute_A_b(b3SparseMat33& SA, b3DenseVec3& b) const | ||||||
|  | { | ||||||
|  | 	// Compute dfdx, dfdv | ||||||
|  | 	b3Mat33* dfdx = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33)); | ||||||
|  | 	b3SetZero(dfdx, m_massCount); | ||||||
|  |  | ||||||
|  | 	b3Mat33* dfdv = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33)); | ||||||
|  | 	b3SetZero(dfdv, m_massCount); | ||||||
|  |  | ||||||
|  | 	for (u32 i = 0; i < m_springCount; ++i) | ||||||
|  | 	{ | ||||||
|  | 		const b3Spring* S = m_springs + i; | ||||||
|  | 		u32 i1 = S->i1; | ||||||
|  | 		u32 i2 = S->i2; | ||||||
|  |  | ||||||
|  | 		b3Mat33 Jx11 = m_Jx[i]; | ||||||
|  | 		b3Mat33 Jx12 = -Jx11; | ||||||
|  | 		b3Mat33 Jx21 = Jx12; | ||||||
|  | 		b3Mat33 Jx22 = Jx11; | ||||||
|  |  | ||||||
|  | 		dfdx[B3_INDEX(i1, i1, m_massCount)] += Jx11; | ||||||
|  | 		dfdx[B3_INDEX(i1, i2, m_massCount)] += Jx12; | ||||||
|  | 		dfdx[B3_INDEX(i2, i1, m_massCount)] += Jx21; | ||||||
|  | 		dfdx[B3_INDEX(i2, i2, m_massCount)] += Jx22; | ||||||
|  |  | ||||||
|  | 		b3Mat33 Jv11 = m_Jv[i]; | ||||||
|  | 		b3Mat33 Jv12 = -Jv11; | ||||||
|  | 		b3Mat33 Jv21 = Jv12; | ||||||
|  | 		b3Mat33 Jv22 = Jv11; | ||||||
|  |  | ||||||
|  | 		dfdv[B3_INDEX(i1, i1, m_massCount)] += Jv11; | ||||||
|  | 		dfdv[B3_INDEX(i1, i2, m_massCount)] += Jv12; | ||||||
|  | 		dfdv[B3_INDEX(i2, i1, m_massCount)] += Jv21; | ||||||
|  | 		dfdv[B3_INDEX(i2, i2, m_massCount)] += Jv22; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	// Compute A | ||||||
|  | 	// A = M - h * dfdv - h * h * dfdx | ||||||
|  |  | ||||||
|  | 	// A = 0 | ||||||
|  | 	b3Mat33* A = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33)); | ||||||
|  | 	b3SetZero(A, m_massCount); | ||||||
|  |  | ||||||
|  | 	// A += M | ||||||
|  | 	for (u32 i = 0; i < m_massCount; ++i) | ||||||
|  | 	{ | ||||||
|  | 		A[B3_INDEX(i, i, m_massCount)] += b3Diagonal(m_m[i]); | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// A += - h * dfdv - h * h * dfdx | ||||||
|  | 	for (u32 i = 0; i < m_massCount; ++i) | ||||||
|  | 	{ | ||||||
|  | 		for (u32 j = 0; j < m_massCount; ++j) | ||||||
|  | 		{ | ||||||
|  | 			A[B3_INDEX(i, j, m_massCount)] += (-m_h * dfdv[B3_INDEX(i, j, m_massCount)]) + (-m_h * m_h * dfdx[B3_INDEX(i, j, m_massCount)]); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	// Assembly sparsity | ||||||
|  | 	u32 nzCount = 0; | ||||||
|  |  | ||||||
|  | #if 0 | ||||||
|  | 	for (u32 i = 0; i < m_massCount * m_massCount; ++i) | ||||||
|  | 	{ | ||||||
|  | 		b3Mat33 a = A[i]; | ||||||
|  | 		if (b3IsZero(a) == false) | ||||||
|  | 		{ | ||||||
|  | 			++nzCount; | ||||||
|  | 		} | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | 	SA.row_ptrs[0] = 0; | ||||||
|  |  | ||||||
|  | 	for (u32 i = 0; i < m_massCount; ++i) | ||||||
|  | 	{ | ||||||
|  | 		u32 rowNzCount = 0; | ||||||
|  |  | ||||||
|  | 		for (u32 j = 0; j < m_massCount; ++j) | ||||||
|  | 		{ | ||||||
|  | 			b3Mat33 a = A[B3_INDEX(i, j, m_massCount)]; | ||||||
|  |  | ||||||
|  | 			if (b3IsZero(a) == false) | ||||||
|  | 			{ | ||||||
|  | 				B3_ASSERT(nzCount <= SA.valueCount); | ||||||
|  |  | ||||||
|  | 				SA.values[nzCount] = a; | ||||||
|  | 				SA.cols[nzCount] = j; | ||||||
|  |  | ||||||
|  | 				++nzCount; | ||||||
|  | 				++rowNzCount; | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		SA.row_ptrs[i + 1] = SA.row_ptrs[(i + 1) - 1] + rowNzCount; | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	B3_ASSERT(nzCount <= SA.valueCount); | ||||||
|  | 	SA.valueCount = nzCount; | ||||||
|  |  | ||||||
|  | 	m_allocator->Free(A); | ||||||
|  |  | ||||||
|  | 	// Compute b | ||||||
|  | 	// b = h * (f0 + h * Jx_v + Jx_y ) | ||||||
|  | 	 | ||||||
| 	// Jx_v = dfdx * v | 	// Jx_v = dfdx * v | ||||||
| 	b3Vec3* Jx_v = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3Vec3* Jx_v = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | ||||||
| 	b3Mul_Jacobian(Jx_v, m_v, m_massCount, m_Jx, m_springs, m_springCount); | 	b3Mul_Jacobian(Jx_v, m_v, m_massCount, m_Jx, m_springs, m_springCount); | ||||||
| @@ -385,7 +352,6 @@ void b3SpringSolver::Compute_b(b3Vec3* b) const | |||||||
| 	b3Vec3* Jx_y = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3Vec3* Jx_y = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | ||||||
| 	b3Mul_Jacobian(Jx_y, m_y, m_massCount, m_Jx, m_springs, m_springCount); | 	b3Mul_Jacobian(Jx_y, m_y, m_massCount, m_Jx, m_springs, m_springCount); | ||||||
|  |  | ||||||
| 	// b = h * (f0 + h * Jx_v + Jx_y ) |  | ||||||
| 	for (u32 i = 0; i < m_massCount; ++i) | 	for (u32 i = 0; i < m_massCount; ++i) | ||||||
| 	{ | 	{ | ||||||
| 		b[i] = m_h * (m_f[i] + m_h * Jx_v[i] + Jx_y[i]); | 		b[i] = m_h * (m_f[i] + m_h * Jx_v[i] + Jx_y[i]); | ||||||
| @@ -393,184 +359,198 @@ void b3SpringSolver::Compute_b(b3Vec3* b) const | |||||||
|  |  | ||||||
| 	m_allocator->Free(Jx_y); | 	m_allocator->Free(Jx_y); | ||||||
| 	m_allocator->Free(Jx_v); | 	m_allocator->Free(Jx_v); | ||||||
|  |  | ||||||
|  | 	m_allocator->Free(dfdv); | ||||||
|  | 	m_allocator->Free(dfdx); | ||||||
| } | } | ||||||
|  |  | ||||||
| void b3SpringSolver::Solve_MCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3Vec3* b) const | // This outputs the desired acceleration of the masses in the constrained  | ||||||
|  | // directions. | ||||||
|  | static void b3Compute_z(b3DenseVec3& out, | ||||||
|  | 	u32 massCount, const b3MassType* types, const b3MassContact* contacts) | ||||||
|  | { | ||||||
|  | 	out.SetZero(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Maintains invariants inside the MCG solver. | ||||||
|  | static void b3Filter(b3DenseVec3& out, | ||||||
|  | 	const b3DenseVec3& v, u32 massCount, const b3MassType* types, const b3MassContact* contacts) | ||||||
|  | { | ||||||
|  | 	for (u32 i = 0; i < massCount; ++i) | ||||||
|  | 	{ | ||||||
|  | 		switch (types[i]) | ||||||
|  | 		{ | ||||||
|  | 		case e_staticMass: | ||||||
|  | 		{ | ||||||
|  | 			out[i].SetZero(); | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 		case e_dynamicMass: | ||||||
|  | 		{ | ||||||
|  | 			if (contacts[i].lockN == true) | ||||||
|  | 			{ | ||||||
|  | 				b3Vec3 n = contacts[i].n; | ||||||
|  |  | ||||||
|  | 				b3Mat33 S = b3Mat33_identity - b3Outer(n, n); | ||||||
|  |  | ||||||
|  | 				if (contacts[i].lockT1 == true) | ||||||
|  | 				{ | ||||||
|  | 					b3Vec3 t1 = contacts[i].t1; | ||||||
|  |  | ||||||
|  | 					S -= b3Outer(t1, t1); | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				if (contacts[i].lockT2 == true) | ||||||
|  | 				{ | ||||||
|  | 					b3Vec3 t2 = contacts[i].t2; | ||||||
|  |  | ||||||
|  | 					S -= b3Outer(t2, t2); | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				out[i] = S * v[i]; | ||||||
|  | 				break; | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			out[i] = v[i]; | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 		default: | ||||||
|  | 		{ | ||||||
|  | 			B3_ASSERT(false); | ||||||
|  | 			break; | ||||||
|  | 		} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void b3SpringSolver::Solve_MCG(b3DenseVec3& dv, const b3SparseMat33& A, b3DenseVec3& e, u32& iterations, const b3DenseVec3& b) const | ||||||
| { | { | ||||||
| 	// dv = z | 	// dv = z | ||||||
| 	b3Compute_z(dv, m_massCount, m_types, m_contacts); | 	b3Compute_z(dv, m_massCount, m_types, m_contacts); | ||||||
|  |  | ||||||
| 	// r = filter(b - Adv) |  | ||||||
| 	b3Vec3* r = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); |  | ||||||
|  |  | ||||||
| 	// Adv = A * dv  | 	// Adv = A * dv  | ||||||
| 	b3Vec3* Adv = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 Adv(m_massCount); | ||||||
|  | 	A.Mul(Adv, dv); | ||||||
|  |  | ||||||
| 	b3Mul_A(Adv, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); | 	// r = filter(b - Adv) | ||||||
| 	b3Sub(r, b, Adv, m_massCount); | 	b3DenseVec3 r(m_massCount); | ||||||
|  | 	b3Sub(r, b, Adv); | ||||||
| 	b3Filter(r, r, m_massCount, m_types, m_contacts); | 	b3Filter(r, r, m_massCount, m_types, m_contacts); | ||||||
|  |  | ||||||
| 	m_allocator->Free(Adv); |  | ||||||
|  |  | ||||||
| 	// c = r | 	// c = r | ||||||
| 	b3Vec3* c = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 c = r; | ||||||
| 	b3Copy(c, r, m_massCount); |  | ||||||
|  |  | ||||||
| 	// eps0 = dot(f, f) | 	// eps0 = dot(r, r) | ||||||
| 	float32 eps0 = b3Dot(r, r, m_massCount); | 	float32 eps0 = b3Dot(r, r); | ||||||
|  |  | ||||||
| 	// epsNew = dot(r, r) | 	// epsNew = dot(r, r) | ||||||
| 	float32 epsNew = eps0; | 	float32 epsNew = eps0; | ||||||
|  |  | ||||||
| 	// [0, 1] | 	// [0, 1] | ||||||
| 	const float32 kTol = 0.25f; | 	const float32 kTol = 10.0f * B3_EPSILON; | ||||||
|  |  | ||||||
| 	// Limit number of iterations to prevent cycling. | 	// Limit number of iterations to prevent cycling. | ||||||
| 	const u32 kMaxIters = 100; | 	const u32 kMaxIters = 10 * 10; | ||||||
|  |  | ||||||
| 	// Main iteration loop. | 	// Main iteration loop. | ||||||
| 	u32 iter = 0; | 	u32 iter = 0; | ||||||
| 	while (iter < kMaxIters && epsNew > kTol * kTol * eps0) | 	while (iter < kMaxIters && epsNew > kTol * kTol * eps0) | ||||||
| 	{ | 	{ | ||||||
| 		// q = filter(A * c)  | 		// q = filter(A * c)  | ||||||
| 		b3Vec3* q = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 		b3DenseVec3 q(m_massCount); | ||||||
| 		b3Mul_A(q, c, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); | 		A.Mul(q, c); | ||||||
| 		b3Filter(q, q, m_massCount, m_types, m_contacts); | 		b3Filter(q, q, m_massCount, m_types, m_contacts); | ||||||
|  |  | ||||||
| 		// alpha = epsNew / dot(c, q) | 		// alpha = epsNew / dot(c, q) | ||||||
| 		float32 alpha_den = b3Dot(c, q, m_massCount); | 		float32 alpha_den = b3Dot(c, q); | ||||||
| 		float32 alpha = epsNew / alpha_den; | 		float32 alpha = epsNew / alpha_den; | ||||||
|  |  | ||||||
| 		// dv = dv + alpha * c | 		// dv = dv + alpha * c | ||||||
| 		for (u32 i = 0; i < m_massCount; ++i) | 		b3DenseVec3 alpha_c(m_massCount); | ||||||
| 		{ | 		b3Mul(alpha_c, alpha, c); | ||||||
| 			dv[i] = dv[i] + alpha * c[i]; | 		b3Add(dv, dv, alpha_c); | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// r = r - alpha * q  | 		// r = r - alpha * q  | ||||||
| 		for (u32 i = 0; i < m_massCount; ++i) | 		b3DenseVec3 alpha_q(m_massCount); | ||||||
| 		{ | 		b3Mul(alpha_q, alpha, q); | ||||||
| 			r[i] = r[i] - alpha * q[i]; | 		b3Sub(r, r, alpha_q); | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		m_allocator->Free(q); |  | ||||||
|  |  | ||||||
| 		// epsOld = epsNew | 		// epsOld = epsNew | ||||||
| 		float32 epsOld = epsNew; | 		float32 epsOld = epsNew; | ||||||
|  |  | ||||||
| 		// epsNew = dot(r, r)  | 		// epsNew = dot(r, r)  | ||||||
| 		epsNew = b3Dot(r, r, m_massCount); | 		epsNew = b3Dot(r, r); | ||||||
|  |  | ||||||
| 		float32 beta = epsNew / epsOld; | 		float32 beta = epsNew / epsOld; | ||||||
|  |  | ||||||
| 		// c = filter(r + beta * c)  | 		// c = filter(r + beta * c)  | ||||||
| 		for (u32 i = 0; i < m_massCount; ++i) | 		b3DenseVec3 beta_c(m_massCount); | ||||||
| 		{ | 		b3Mul(beta_c, beta, c); | ||||||
| 			c[i] = r[i] + beta * c[i]; | 		b3Add(c, r, beta_c); | ||||||
| 		} |  | ||||||
| 		b3Filter(c, c, m_massCount, m_types, m_contacts); | 		b3Filter(c, c, m_massCount, m_types, m_contacts); | ||||||
|  |  | ||||||
| 		++iter; | 		++iter; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	m_allocator->Free(c); |  | ||||||
| 	m_allocator->Free(r); |  | ||||||
|  |  | ||||||
| 	iterations = iter; | 	iterations = iter; | ||||||
|  |  | ||||||
| 	// f = A * dv - b | 	// f = A * dv - b | ||||||
| 	b3Mul_A(e, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); | 	A.Mul(e, dv); | ||||||
| 	b3Sub(e, e, b, m_massCount); | 	b3Sub(e, e, b); | ||||||
| } | } | ||||||
|  |  | ||||||
| static void b3Make_A(b3Mat33* A, | // Sylvester's Criterion | ||||||
| 	const b3Mat33* Jx, const b3Mat33* Jv, u32 massCount, | static bool b3IsPD(const b3Mat33* diagA, u32 n) | ||||||
| 	b3StackAllocator* allocator, float32 h, float32* m, |  | ||||||
| 	const b3Spring* springs, u32 springCount) |  | ||||||
| { | { | ||||||
| 	// A = M - h * dfdv - h * h * dfdx | 	// Loop over the principal elements | ||||||
|  | 	for (u32 i = 0; i < n; ++i) | ||||||
| 	// A = 0 |  | ||||||
| 	b3SetZero(A, massCount); |  | ||||||
|  |  | ||||||
| 	// Compute dfdx, dfdv |  | ||||||
| 	b3Mat33* dfdx = (b3Mat33*)allocator->Allocate(massCount * massCount * sizeof(b3Mat33)); |  | ||||||
| 	b3SetZero(dfdx, massCount); |  | ||||||
|  |  | ||||||
| 	b3Mat33* dfdv = (b3Mat33*)allocator->Allocate(massCount * massCount * sizeof(b3Mat33)); |  | ||||||
| 	b3SetZero(dfdv, massCount); |  | ||||||
|  |  | ||||||
| 	for (u32 i = 0; i < springCount; ++i) |  | ||||||
| 	{ | 	{ | ||||||
| 		const b3Spring* S = springs + i; | 		b3Mat33 a = diagA[i]; | ||||||
| 		u32 i1 = S->i1; |  | ||||||
| 		u32 i2 = S->i2; |  | ||||||
| 		 | 		 | ||||||
| 		b3Mat33 Jx11 = Jx[i]; | 		float32 D = b3Det(a.x, a.y, a.z); | ||||||
| 		b3Mat33 Jx12 = -Jx11; |  | ||||||
| 		b3Mat33 Jx21 = Jx12; |  | ||||||
| 		b3Mat33 Jx22 = Jx11; |  | ||||||
|  |  | ||||||
| 		dfdx[B3_INDEX(i1, i1, massCount)] += Jx11; | 		const float32 kTol = 0.0f; | ||||||
| 		dfdx[B3_INDEX(i1, i2, massCount)] += Jx12; |  | ||||||
| 		dfdx[B3_INDEX(i2, i1, massCount)] += Jx21; |  | ||||||
| 		dfdx[B3_INDEX(i2, i2, massCount)] += Jx22; |  | ||||||
|  |  | ||||||
| 		b3Mat33 Jv11 = Jv[i]; | 		if (D <= kTol) | ||||||
| 		b3Mat33 Jv12 = -Jv11; |  | ||||||
| 		b3Mat33 Jv21 = Jv12; |  | ||||||
| 		b3Mat33 Jv22 = Jv11; |  | ||||||
|  |  | ||||||
| 		dfdv[B3_INDEX(i1, i1, massCount)] += Jv11; |  | ||||||
| 		dfdv[B3_INDEX(i1, i2, massCount)] += Jv12; |  | ||||||
| 		dfdv[B3_INDEX(i2, i1, massCount)] += Jv21; |  | ||||||
| 		dfdv[B3_INDEX(i2, i2, massCount)] += Jv22; |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// A += M |  | ||||||
| 	for (u32 i = 0; i < massCount; ++i) |  | ||||||
| 		{ | 		{ | ||||||
| 		A[B3_INDEX(i, i, massCount)] += b3Diagonal(m[i]); | 			return false; | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// A += - h * dfdv - h * h * dfdx |  | ||||||
| 	for (u32 i = 0; i < massCount; ++i) |  | ||||||
| 	{ |  | ||||||
| 		for (u32 j = 0; j < massCount; ++j) |  | ||||||
| 		{ |  | ||||||
| 			A[B3_INDEX(i, j, massCount)] += (-h * dfdv[B3_INDEX(i, j, massCount)]) + (-h * h * dfdx[B3_INDEX(i, j, massCount)]); |  | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	allocator->Free(dfdv); | 	return true; | ||||||
| 	allocator->Free(dfdx); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3Vec3* b) const | void b3SpringSolver::Solve_MPCG(b3DenseVec3& dv, const b3SparseMat33& A, b3DenseVec3& e, u32& iterations, const b3DenseVec3& b) const | ||||||
| { | { | ||||||
| 	b3Vec3* r = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 r(m_massCount); | ||||||
| 	 | 	 | ||||||
| 	b3Vec3* c = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 c(m_massCount); | ||||||
| 	 | 	 | ||||||
| 	b3Vec3* s = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 s(m_massCount); | ||||||
|  |  | ||||||
| 	b3Vec3* inv_P = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 inv_P(m_massCount); | ||||||
|  |  | ||||||
| 	// dv = z | 	// dv = z | ||||||
| 	b3Compute_z(dv, m_massCount, m_types, m_contacts); | 	b3Compute_z(dv, m_massCount, m_types, m_contacts); | ||||||
|  |  | ||||||
| 	// P = diag(A)^-1 | 	// P = diag(A)^-1 | ||||||
| 	b3Vec3* P = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 P(m_massCount); | ||||||
|  |  | ||||||
| 	// A = M - h * dfdv - h * h * dfdx | 	// diag(A) | ||||||
| 	b3Mat33* A = (b3Mat33*)m_allocator->Allocate(m_massCount * m_massCount * sizeof(b3Mat33)); | 	b3Mat33* diagA = (b3Mat33*)m_allocator->Allocate(m_massCount * sizeof(b3Mat33)); | ||||||
| 	b3Make_A(A, m_Jx, m_Jv, m_massCount, m_allocator, m_h, m_m, m_springs, m_springCount); | 	A.AssembleDiagonal(diagA); | ||||||
|  |  | ||||||
| 	// Compute P, P^-1 | 	// Compute P, P^-1 | ||||||
| 	// @todo Optimize so we don't need to compute A. | 	bool isPD = true; | ||||||
|  |  | ||||||
| 	for (u32 i = 0; i < m_massCount; ++i) | 	for (u32 i = 0; i < m_massCount; ++i) | ||||||
| 	{ | 	{ | ||||||
| 		b3Mat33 D = A[B3_INDEX(i, i, m_massCount)]; | 		b3Mat33 D = diagA[i]; | ||||||
|  |  | ||||||
|  | 		if (b3Det(D.x, D.y, D.z) <= 3.0f * B3_EPSILON) | ||||||
|  | 		{ | ||||||
|  | 			isPD = false; | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		B3_ASSERT(D[0][0] != 0.0f); | 		B3_ASSERT(D[0][0] != 0.0f); | ||||||
| 		B3_ASSERT(D[1][1] != 0.0f); | 		B3_ASSERT(D[1][1] != 0.0f); | ||||||
| @@ -580,13 +560,13 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 | |||||||
| 		inv_P[i] = b3Vec3(D[0][0], D[1][1], D[2][2]); | 		inv_P[i] = b3Vec3(D[0][0], D[1][1], D[2][2]); | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	m_allocator->Free(A); | 	m_allocator->Free(diagA); | ||||||
| 	 | 	 | ||||||
| 	// eps0 = dot( filter(b), P * filter(b) ) | 	// eps0 = dot( filter(b), P * filter(b) ) | ||||||
| 	b3Vec3* filter_b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 filter_b(m_massCount); | ||||||
| 	b3Filter(filter_b, b, m_massCount, m_types, m_contacts); | 	b3Filter(filter_b, b, m_massCount, m_types, m_contacts); | ||||||
|  |  | ||||||
| 	b3Vec3* P_filter_b = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 P_filter_b(m_massCount); | ||||||
| 	for (u32 i = 0; i < m_massCount; ++i) | 	for (u32 i = 0; i < m_massCount; ++i) | ||||||
| 	{ | 	{ | ||||||
| 		P_filter_b[i][0] = P[i][0] * filter_b[i][0]; | 		P_filter_b[i][0] = P[i][0] * filter_b[i][0]; | ||||||
| @@ -594,22 +574,14 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 | |||||||
| 		P_filter_b[i][2] = P[i][2] * filter_b[i][2]; | 		P_filter_b[i][2] = P[i][2] * filter_b[i][2]; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	float32 eps0 = b3Dot(filter_b, P_filter_b, m_massCount); | 	float32 eps0 = b3Dot(filter_b, P_filter_b); | ||||||
|  |  | ||||||
| 	m_allocator->Free(P_filter_b); |  | ||||||
| 	m_allocator->Free(filter_b); |  | ||||||
| 	m_allocator->Free(P); |  | ||||||
| 	 | 	 | ||||||
| 	// r = filter(b - Adv) | 	// r = filter(b - Adv) | ||||||
|  |  | ||||||
| 	// Adv = A * dv | 	// Adv = A * dv | ||||||
| 	b3Vec3* Adv = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 	b3DenseVec3 Adv(m_massCount); | ||||||
| 	b3Mul_A(Adv, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); | 	A.Mul(Adv, dv); | ||||||
|  | 	b3Sub(r, b, Adv); | ||||||
| 	b3Sub(r, b, Adv, m_massCount); |  | ||||||
|  |  | ||||||
| 	m_allocator->Free(Adv); |  | ||||||
|  |  | ||||||
| 	b3Filter(r, r, m_massCount, m_types, m_contacts); | 	b3Filter(r, r, m_massCount, m_types, m_contacts); | ||||||
|  |  | ||||||
| 	// c = filter(P^-1 * r) | 	// c = filter(P^-1 * r) | ||||||
| @@ -622,26 +594,25 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 | |||||||
| 	b3Filter(c, c, m_massCount, m_types, m_contacts); | 	b3Filter(c, c, m_massCount, m_types, m_contacts); | ||||||
| 	 | 	 | ||||||
| 	// epsNew = dot(r, c) | 	// epsNew = dot(r, c) | ||||||
| 	float32 epsNew = b3Dot(r, c, m_massCount); | 	float32 epsNew = b3Dot(r, c); | ||||||
|  |  | ||||||
| 	// [0, 1] | 	// [0, 1] | ||||||
| 	const float32 kTol = 0.25f; | 	const float32 kTol = 10.0f * B3_EPSILON; | ||||||
|  |  | ||||||
| 	// Limit number of iterations to prevent cycling. | 	// Limit number of iterations to prevent cycling. | ||||||
| 	const u32 kMaxIters = 100; | 	const u32 kMaxIters = 10 * 10; | ||||||
|  |  | ||||||
| 	// Main iteration loop. | 	// Main iteration loop. | ||||||
| 	u32 iter = 0; | 	u32 iter = 0; | ||||||
| 	while (iter < kMaxIters && epsNew > kTol * kTol * eps0) | 	while (iter < kMaxIters && epsNew > kTol * kTol * eps0) | ||||||
| 	{ | 	{ | ||||||
| 		// q = filter(A * c) | 		// q = filter(A * c) | ||||||
| 		b3Vec3* q = (b3Vec3*)m_allocator->Allocate(m_massCount * sizeof(b3Vec3)); | 		b3DenseVec3 q(m_massCount); | ||||||
|  | 		A.Mul(q, c); | ||||||
| 		b3Mul_A(q, c, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); |  | ||||||
| 		b3Filter(q, q, m_massCount, m_types, m_contacts); | 		b3Filter(q, q, m_massCount, m_types, m_contacts); | ||||||
|  |  | ||||||
| 		// alpha = epsNew / dot(c, q) | 		// alpha = epsNew / dot(c, q) | ||||||
| 		float32 alpha = epsNew / b3Dot(c, q, m_massCount); | 		float32 alpha = epsNew / b3Dot(c, q); | ||||||
|  |  | ||||||
| 		// x = x + alpha * c | 		// x = x + alpha * c | ||||||
| 		for (u32 i = 0; i < m_massCount; ++i) | 		for (u32 i = 0; i < m_massCount; ++i) | ||||||
| @@ -655,8 +626,6 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 | |||||||
| 			r[i] = r[i] - alpha * q[i]; | 			r[i] = r[i] - alpha * q[i]; | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		m_allocator->Free(q); |  | ||||||
|  |  | ||||||
| 		// s = inv_P * r | 		// s = inv_P * r | ||||||
| 		for (u32 i = 0; i < m_massCount; ++i) | 		for (u32 i = 0; i < m_massCount; ++i) | ||||||
| 		{ | 		{ | ||||||
| @@ -669,7 +638,7 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 | |||||||
| 		float32 epsOld = epsNew; | 		float32 epsOld = epsNew; | ||||||
|  |  | ||||||
| 		// epsNew = dot(r, s) | 		// epsNew = dot(r, s) | ||||||
| 		epsNew = b3Dot(r, s, m_massCount); | 		epsNew = b3Dot(r, s); | ||||||
|  |  | ||||||
| 		// beta = epsNew / epsOld | 		// beta = epsNew / epsOld | ||||||
| 		float32 beta = epsNew / epsOld; | 		float32 beta = epsNew / epsOld; | ||||||
| @@ -684,15 +653,10 @@ void b3SpringSolver::Solve_MPCG(b3Vec3* dv, b3Vec3* e, u32& iterations, const b3 | |||||||
| 		++iter; | 		++iter; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	m_allocator->Free(inv_P); |  | ||||||
| 	m_allocator->Free(s); |  | ||||||
| 	m_allocator->Free(c); |  | ||||||
| 	m_allocator->Free(r); |  | ||||||
|  |  | ||||||
| 	iterations = iter; | 	iterations = iter; | ||||||
|  |  | ||||||
| 	// Residual error | 	// Residual error | ||||||
| 	// f = A * x - b | 	// f = A * x - b | ||||||
| 	b3Mul_A(e, dv, m_massCount, m_allocator, m_m, m_h, m_Jx, m_Jv, m_springs, m_springCount); | 	A.Mul(e, dv); | ||||||
| 	b3Sub(e, e, b, m_massCount); | 	b3Sub(e, e, b); | ||||||
| } | } | ||||||
		Reference in New Issue
	
	Block a user