102 #include <emmintrin.h>
104 #define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e))
105 static inline __m128 btSimdDot3(__m128 vec0, __m128 vec1)
107 __m128 result = _mm_mul_ps(vec0, vec1);
108 return _mm_add_ps(btVecSplat(result, 0), _mm_add_ps(btVecSplat(result, 1), btVecSplat(result, 2)));
111 #if defined(BT_ALLOW_SSE4)
115 #define USE_FMA3_INSTEAD_FMA4 1
116 #define USE_SSE4_DOT 1
118 #define SSE4_DP(a, b) _mm_dp_ps(a, b, 0x7f)
119 #define SSE4_DP_FP(a, b) _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f))
122 #define DOT_PRODUCT(a, b) SSE4_DP(a, b)
124 #define DOT_PRODUCT(a, b) btSimdDot3(a, b)
128 #if USE_FMA3_INSTEAD_FMA4
130 #define FMADD(a, b, c) _mm_fmadd_ps(a, b, c)
132 #define FMNADD(a, b, c) _mm_fnmadd_ps(a, b, c)
135 #define FMADD(a, b, c) _mm_macc_ps(a, b, c)
137 #define FMNADD(a, b, c) _mm_nmacc_ps(a, b, c)
141 #define FMADD(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b))
143 #define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
156 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
157 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
160 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
161 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
162 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
163 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
164 c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
165 __m128 upperMinApplied = _mm_sub_ps(upperLimit1, cpAppliedImp);
166 deltaImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied));
170 __m128 impulseMagnitude = deltaImpulse;
181 #if defined(BT_ALLOW_SSE4)
188 deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
189 deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
191 const __m128 maskLower = _mm_cmpgt_ps(tmp, lowerLimit);
192 const __m128 maskUpper = _mm_cmpgt_ps(upperLimit, tmp);
193 deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), _mm_blendv_ps(_mm_sub_ps(upperLimit, c.
m_appliedImpulse), deltaImpulse, maskUpper), maskLower);
194 c.
m_appliedImpulse = _mm_blendv_ps(lowerLimit, _mm_blendv_ps(upperLimit, tmp, maskUpper), maskLower);
202 return gResolveSingleConstraintRowGeneric_sse2(bodyA, bodyB, c);
214 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
215 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
218 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
219 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
220 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
221 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
222 c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
225 __m128 impulseMagnitude = deltaImpulse;
242 deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
243 deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
245 const __m128 mask = _mm_cmpgt_ps(tmp, lowerLimit);
246 deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), deltaImpulse, mask);
255 return gResolveSingleConstraintRowLowerLimit_sse2(bodyA, bodyB, c);
256 #endif //BT_ALLOW_SSE4
328 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
329 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
332 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
333 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
334 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
335 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
336 c.
m_appliedPushImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
339 __m128 impulseMagnitude = deltaImpulse;
378 #endif //BT_ALLOW_SSE4
400 return gResolveSingleConstraintRowGeneric_sse2;
404 return gResolveSingleConstraintRowLowerLimit_sse2;
409 return gResolveSingleConstraintRowGeneric_sse4_1_fma3;
413 return gResolveSingleConstraintRowLowerLimit_sse4_1_fma3;
415 #endif //BT_ALLOW_SSE4
428 const unsigned long un = static_cast<unsigned long>(n);
433 if (un <= 0x00010000UL)
436 if (un <= 0x00000100UL)
439 if (un <= 0x00000010UL)
442 if (un <= 0x00000004UL)
445 if (un <= 0x00000002UL)
454 return (
int)(r % un);
495 if (
btFabs(rel_vel) < velocityThreshold)
498 btScalar rest = restitution * -rel_vel;
510 loc_lateral *= friction_scaling;
516 void btSequentialImpulseConstraintSolver::setupFrictionConstraint(
btSolverConstraint& solverConstraint,
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
btManifoldPoint& cp,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
const btContactSolverInfo& infoGlobal,
btScalar desiredVelocity,
btScalar cfmSlip)
575 btScalar denom = relaxation / (denom0 + denom1);
584 rel_vel = vel1Dotn + vel2Dotn;
588 btScalar velocityError = desiredVelocity - rel_vel;
597 penetrationImpulse = positionalError * solverConstraint.
m_jacDiagABInv;
600 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
602 solverConstraint.
m_cfm = cfmSlip;
608 btSolverConstraint&
btSequentialImpulseConstraintSolver::addFrictionConstraint(
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
int frictionIndex,
btManifoldPoint& cp,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
const btContactSolverInfo& infoGlobal,
btScalar desiredVelocity,
btScalar cfmSlip)
613 colObj0, colObj1, relaxation, infoGlobal, desiredVelocity, cfmSlip);
614 return solverConstraint;
636 solverConstraint.
m_friction = combinedTorsionalFriction;
667 rel_vel = vel1Dotn + vel2Dotn;
673 solverConstraint.
m_rhs = velocityImpulse;
674 solverConstraint.
m_cfm = cfmSlip;
680 btSolverConstraint&
btSequentialImpulseConstraintSolver::addTorsionalFrictionConstraint(
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
int frictionIndex,
btManifoldPoint& cp,
btScalar combinedTorsionalFriction,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
btScalar desiredVelocity,
btScalar cfmSlip)
685 colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
686 return solverConstraint;
692 int solverBodyId = -1;
699 if (solverBodyId < 0)
717 const int INVALID_SOLVER_BODY_ID = -1;
724 if (solverBodyId == INVALID_SOLVER_BODY_ID)
737 if (!isMultiBodyType)
753 #else // BT_THREADSAFE
755 int solverBodyIdA = -1;
787 return solverBodyIdA;
788 #endif // BT_THREADSAFE
793 int solverBodyIdA,
int solverBodyIdB,
812 relaxation = infoGlobal.
m_sor;
850 #ifdef COMPUTE_IMPULSE_DENOM
867 #endif //COMPUTE_IMPULSE_DENOM
869 btScalar denom = relaxation / (denom0 + denom1 + cfm);
940 btScalar rel_vel = vel1Dotn + vel2Dotn;
943 btScalar velocityError = restitution - rel_vel;
949 velocityError -= penetration * invTimeStep;
953 positionalError = -penetration * erp * invTimeStep;
962 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
968 solverConstraint.
m_rhs = velocityImpulse;
978 int solverBodyIdA,
int solverBodyIdB,
1041 int rollingFriction = 1;
1063 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
1074 setupContactConstraint(solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, relaxation, rel_pos1, rel_pos2);
1083 addTorsionalFrictionConstraint(cp.
m_normalWorldOnB, solverBodyIdA, solverBodyIdB, frictionIndex, cp, cp.
m_combinedSpinningFriction, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
1093 if (axis0.
length() > 0.001)
1096 if (axis1.
length() > 0.001)
1127 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1135 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1144 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1150 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1161 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal, cp.
m_contactMotion1, cp.
m_frictionCFM);
1164 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal, cp.
m_contactMotion2, cp.
m_frictionCFM);
1177 for (i = 0; i < numManifolds; i++)
1179 manifold = manifoldPtr[i];
1233 info2.erp = infoGlobal.
m_erp;
1241 info2.m_constraintError = ¤tConstraintRow->
m_rhs;
1244 info2.cfm = ¤tConstraintRow->
m_cfm;
1245 info2.m_lowerLimit = ¤tConstraintRow->
m_lowerLimit;
1246 info2.m_upperLimit = ¤tConstraintRow->
m_upperLimit;
1304 rel_vel = vel1Dotn + vel2Dotn;
1307 btScalar velocityError = restitution - rel_vel * info2.m_damping;
1310 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
1319 for (
int j = 0; j < numConstraints; j++)
1326 int totalNumRows = 0;
1330 for (
int i = 0; i < numConstraints; i++)
1342 if (constraints[i]->isEnabled())
1358 for (
int i = 0; i < numConstraints; i++)
1364 btAssert(currentRow < totalNumRows);
1374 convertJoint(currentConstraintRow, constraint, info1, solverBodyIdA, solverBodyIdB, infoGlobal);
1383 for (
int i = 0; i < numBodies; i++)
1389 #endif // BT_THREADSAFE
1397 for (
int i = 0; i < numBodies; i++)
1441 #ifdef BT_ADDITIONAL_DEBUG
1443 for (
int i = 0; i < numConstraints; i++)
1451 for (
int b = 0; b < numBodies; b++)
1464 for (
int b = 0; b < numBodies; b++)
1477 for (
int i = 0; i < numManifolds; i++)
1479 if (!manifoldPtr[i]->getBody0()->isStaticOrKinematicObject())
1482 for (
int b = 0; b < numBodies; b++)
1484 if (manifoldPtr[i]->getBody0() == bodies[b])
1492 if (!manifoldPtr[i]->getBody1()->isStaticOrKinematicObject())
1495 for (
int b = 0; b < numBodies; b++)
1497 if (manifoldPtr[i]->getBody1() == bodies[b])
1506 #endif //BT_ADDITIONAL_DEBUG
1531 for (i = 0; i < numNonContactPool; i++)
1535 for (i = 0; i < numConstraintPool; i++)
1539 for (i = 0; i < numFrictionPool; i++)
1551 btScalar leastSquaresResidual = 0.f;
1561 for (
int j = 0; j < numNonContactPool; ++j)
1572 for (
int j = 0; j < numConstraintPool; ++j)
1580 for (
int j = 0; j < numFrictionPool; ++j)
1598 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1604 for (
int j = 0; j < numConstraints; j++)
1606 if (constraints[j]->isEnabled())
1622 for (
int c = 0; c < numPoolConstraints; c++)
1629 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1633 bool applyFriction =
true;
1645 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1659 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1671 for (j = 0; j < numPoolConstraints; j++)
1675 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1681 for (j = 0; j < numFrictionPoolConstraints; j++)
1692 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1698 for (
int j = 0; j < numRollingFrictionPoolConstraints; j++)
1704 btScalar rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction * totalImpulse;
1705 if (rollingFrictionMagnitude > rollingFrictionConstraint.
m_friction)
1706 rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction;
1708 rollingFrictionConstraint.
m_lowerLimit = -rollingFrictionMagnitude;
1709 rollingFrictionConstraint.
m_upperLimit = rollingFrictionMagnitude;
1712 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1716 return leastSquaresResidual;
1721 BT_PROFILE(
"solveGroupCacheFriendlySplitImpulseIterations");
1726 for (iteration = 0; iteration < infoGlobal.
m_numIterations; iteration++)
1728 btScalar leastSquaresResidual = 0.f;
1732 for (j = 0; j < numPoolConstraints; j++)
1737 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1740 if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.
m_numIterations - 1))
1742 #ifdef VERBOSE_RESIDUAL_PRINTF
1743 printf(
"residual = %f at iteration #%d\n", leastSquaresResidual, iteration);
1754 BT_PROFILE(
"solveGroupCacheFriendlyIterations");
1762 for (
int iteration = 0; iteration <
maxIterations; iteration++)
1769 #ifdef VERBOSE_RESIDUAL_PRINTF
1781 for (
int j = iBegin; j < iEnd; j++)
1801 for (
int j = iBegin; j < iEnd; j++)
1824 for (
int i = iBegin; i < iEnd; i++)