102 #include <emmintrin.h>
104 #define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e))
105 static inline __m128 btSimdDot3(__m128 vec0, __m128 vec1)
107 __m128 result = _mm_mul_ps(vec0, vec1);
108 return _mm_add_ps(btVecSplat(result, 0), _mm_add_ps(btVecSplat(result, 1), btVecSplat(result, 2)));
111 #if defined(BT_ALLOW_SSE4)
115 #define USE_FMA3_INSTEAD_FMA4 1
116 #define USE_SSE4_DOT 1
118 #define SSE4_DP(a, b) _mm_dp_ps(a, b, 0x7f)
119 #define SSE4_DP_FP(a, b) _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f))
122 #define DOT_PRODUCT(a, b) SSE4_DP(a, b)
124 #define DOT_PRODUCT(a, b) btSimdDot3(a, b)
128 #if USE_FMA3_INSTEAD_FMA4
130 #define FMADD(a, b, c) _mm_fmadd_ps(a, b, c)
132 #define FMNADD(a, b, c) _mm_fnmadd_ps(a, b, c)
135 #define FMADD(a, b, c) _mm_macc_ps(a, b, c)
137 #define FMNADD(a, b, c) _mm_nmacc_ps(a, b, c)
141 #define FMADD(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b))
143 #define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
156 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
157 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
160 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
161 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
162 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
163 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
164 c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
165 __m128 upperMinApplied = _mm_sub_ps(upperLimit1, cpAppliedImp);
166 deltaImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied));
170 __m128 impulseMagnitude = deltaImpulse;
181 #if defined(BT_ALLOW_SSE4)
188 deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
189 deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
191 const __m128 maskLower = _mm_cmpgt_ps(tmp, lowerLimit);
192 const __m128 maskUpper = _mm_cmpgt_ps(upperLimit, tmp);
193 deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), _mm_blendv_ps(_mm_sub_ps(upperLimit, c.
m_appliedImpulse), deltaImpulse, maskUpper), maskLower);
194 c.
m_appliedImpulse = _mm_blendv_ps(lowerLimit, _mm_blendv_ps(upperLimit, tmp, maskUpper), maskLower);
202 return gResolveSingleConstraintRowGeneric_sse2(bodyA, bodyB, c);
214 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
215 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
218 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
219 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
220 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
221 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
222 c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
225 __m128 impulseMagnitude = deltaImpulse;
242 deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
243 deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
245 const __m128 mask = _mm_cmpgt_ps(tmp, lowerLimit);
246 deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), deltaImpulse, mask);
255 return gResolveSingleConstraintRowLowerLimit_sse2(bodyA, bodyB, c);
256 #endif //BT_ALLOW_SSE4
328 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
329 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
332 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
333 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
334 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
335 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
336 c.
m_appliedPushImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
339 __m128 impulseMagnitude = deltaImpulse;
378 #endif //BT_ALLOW_SSE4
400 return gResolveSingleConstraintRowGeneric_sse2;
404 return gResolveSingleConstraintRowLowerLimit_sse2;
409 return gResolveSingleConstraintRowGeneric_sse4_1_fma3;
413 return gResolveSingleConstraintRowLowerLimit_sse4_1_fma3;
415 #endif //BT_ALLOW_SSE4
428 const unsigned long un = static_cast<unsigned long>(n);
433 if (un <= 0x00010000UL)
436 if (un <= 0x00000100UL)
439 if (un <= 0x00000010UL)
442 if (un <= 0x00000004UL)
445 if (un <= 0x00000002UL)
454 return (
int)(r % un);
495 if (
btFabs(rel_vel) < velocityThreshold)
498 btScalar rest = restitution * -rel_vel;
510 loc_lateral *= friction_scaling;
516 void btSequentialImpulseConstraintSolver::setupFrictionConstraint(
btSolverConstraint& solverConstraint,
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
btManifoldPoint& cp,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
const btContactSolverInfo& infoGlobal,
btScalar desiredVelocity,
btScalar cfmSlip)
575 btScalar denom = relaxation / (denom0 + denom1);
584 rel_vel = vel1Dotn + vel2Dotn;
588 btScalar velocityError = desiredVelocity - rel_vel;
597 penetrationImpulse = positionalError * solverConstraint.
m_jacDiagABInv;
600 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
602 solverConstraint.
m_cfm = cfmSlip;
608 btSolverConstraint&
btSequentialImpulseConstraintSolver::addFrictionConstraint(
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
int frictionIndex,
btManifoldPoint& cp,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
const btContactSolverInfo& infoGlobal,
btScalar desiredVelocity,
btScalar cfmSlip)
613 colObj0, colObj1, relaxation, infoGlobal, desiredVelocity, cfmSlip);
614 return solverConstraint;
636 solverConstraint.
m_friction = combinedTorsionalFriction;
667 rel_vel = vel1Dotn + vel2Dotn;
673 solverConstraint.
m_rhs = velocityImpulse;
674 solverConstraint.
m_cfm = cfmSlip;
680 btSolverConstraint&
btSequentialImpulseConstraintSolver::addTorsionalFrictionConstraint(
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
int frictionIndex,
btManifoldPoint& cp,
btScalar combinedTorsionalFriction,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
btScalar desiredVelocity,
btScalar cfmSlip)
685 colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
686 return solverConstraint;
692 int solverBodyId = -1;
699 if (solverBodyId < 0)
717 const int INVALID_SOLVER_BODY_ID = -1;
724 if (solverBodyId == INVALID_SOLVER_BODY_ID)
737 if (!isMultiBodyType)
753 #else // BT_THREADSAFE
755 int solverBodyIdA = -1;
787 return solverBodyIdA;
788 #endif // BT_THREADSAFE
793 int solverBodyIdA,
int solverBodyIdB,
812 relaxation = infoGlobal.
m_sor;
850 #ifdef COMPUTE_IMPULSE_DENOM
867 #endif //COMPUTE_IMPULSE_DENOM
869 btScalar denom = relaxation / (denom0 + denom1 + cfm);
940 btScalar rel_vel = vel1Dotn + vel2Dotn;
943 btScalar velocityError = restitution - rel_vel;
949 velocityError -= penetration * invTimeStep;
953 positionalError = -penetration * erp * invTimeStep;
962 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
968 solverConstraint.
m_rhs = velocityImpulse;
978 int solverBodyIdA,
int solverBodyIdB,
1015 int rollingFriction = 1;
1037 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
1048 setupContactConstraint(solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, relaxation, rel_pos1, rel_pos2);
1057 addTorsionalFrictionConstraint(cp.
m_normalWorldOnB, solverBodyIdA, solverBodyIdB, frictionIndex, cp, cp.
m_combinedSpinningFriction, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
1067 if (axis0.
length() > 0.001)
1070 if (axis1.
length() > 0.001)
1101 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1109 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1118 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1124 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1135 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal, cp.
m_contactMotion1, cp.
m_frictionCFM);
1138 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal, cp.
m_contactMotion2, cp.
m_frictionCFM);
1151 for (i = 0; i < numManifolds; i++)
1153 manifold = manifoldPtr[i];
1207 info2.erp = infoGlobal.
m_erp;
1215 info2.m_constraintError = ¤tConstraintRow->
m_rhs;
1218 info2.cfm = ¤tConstraintRow->
m_cfm;
1219 info2.m_lowerLimit = ¤tConstraintRow->
m_lowerLimit;
1220 info2.m_upperLimit = ¤tConstraintRow->
m_upperLimit;
1278 rel_vel = vel1Dotn + vel2Dotn;
1281 btScalar velocityError = restitution - rel_vel * info2.m_damping;
1284 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
1293 for (
int j = 0; j < numConstraints; j++)
1300 int totalNumRows = 0;
1304 for (
int i = 0; i < numConstraints; i++)
1316 if (constraints[i]->isEnabled())
1332 for (
int i = 0; i < numConstraints; i++)
1338 btAssert(currentRow < totalNumRows);
1348 convertJoint(currentConstraintRow, constraint, info1, solverBodyIdA, solverBodyIdB, infoGlobal);
1357 for (
int i = 0; i < numBodies; i++)
1363 #endif // BT_THREADSAFE
1371 for (
int i = 0; i < numBodies; i++)
1415 #ifdef BT_ADDITIONAL_DEBUG
1417 for (
int i = 0; i < numConstraints; i++)
1425 for (
int b = 0; b < numBodies; b++)
1438 for (
int b = 0; b < numBodies; b++)
1451 for (
int i = 0; i < numManifolds; i++)
1453 if (!manifoldPtr[i]->getBody0()->isStaticOrKinematicObject())
1456 for (
int b = 0; b < numBodies; b++)
1458 if (manifoldPtr[i]->getBody0() == bodies[b])
1466 if (!manifoldPtr[i]->getBody1()->isStaticOrKinematicObject())
1469 for (
int b = 0; b < numBodies; b++)
1471 if (manifoldPtr[i]->getBody1() == bodies[b])
1480 #endif //BT_ADDITIONAL_DEBUG
1505 for (i = 0; i < numNonContactPool; i++)
1509 for (i = 0; i < numConstraintPool; i++)
1513 for (i = 0; i < numFrictionPool; i++)
1525 btScalar leastSquaresResidual = 0.f;
1535 for (
int j = 0; j < numNonContactPool; ++j)
1546 for (
int j = 0; j < numConstraintPool; ++j)
1554 for (
int j = 0; j < numFrictionPool; ++j)
1572 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1578 for (
int j = 0; j < numConstraints; j++)
1580 if (constraints[j]->isEnabled())
1596 for (
int c = 0; c < numPoolConstraints; c++)
1603 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1607 bool applyFriction =
true;
1619 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1633 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1645 for (j = 0; j < numPoolConstraints; j++)
1649 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1655 for (j = 0; j < numFrictionPoolConstraints; j++)
1666 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1672 for (
int j = 0; j < numRollingFrictionPoolConstraints; j++)
1678 btScalar rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction * totalImpulse;
1679 if (rollingFrictionMagnitude > rollingFrictionConstraint.
m_friction)
1680 rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction;
1682 rollingFrictionConstraint.
m_lowerLimit = -rollingFrictionMagnitude;
1683 rollingFrictionConstraint.
m_upperLimit = rollingFrictionMagnitude;
1686 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1690 return leastSquaresResidual;
1695 BT_PROFILE(
"solveGroupCacheFriendlySplitImpulseIterations");
1700 for (iteration = 0; iteration < infoGlobal.
m_numIterations; iteration++)
1702 btScalar leastSquaresResidual = 0.f;
1706 for (j = 0; j < numPoolConstraints; j++)
1711 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1714 if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.
m_numIterations - 1))
1716 #ifdef VERBOSE_RESIDUAL_PRINTF
1717 printf(
"residual = %f at iteration #%d\n", leastSquaresResidual, iteration);
1728 BT_PROFILE(
"solveGroupCacheFriendlyIterations");
1736 for (
int iteration = 0; iteration <
maxIterations; iteration++)
1743 #ifdef VERBOSE_RESIDUAL_PRINTF
1763 for (
int j = iBegin; j < iEnd; j++)
1783 for (
int j = iBegin; j < iEnd; j++)
1806 for (
int i = iBegin; i < iEnd; i++)