15 #ifndef BT_MATRIX3x3_H 16 #define BT_MATRIX3x3_H 25 #define vMPPP (_mm_set_ps(+0.0f, +0.0f, +0.0f, -0.0f)) 28 #if defined(BT_USE_SSE) 29 #define v1000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f)) 30 #define v0100 (_mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f)) 31 #define v0010 (_mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f)) 32 #elif defined(BT_USE_NEON) 38 #ifdef BT_USE_DOUBLE_PRECISION 39 #define btMatrix3x3Data btMatrix3x3DoubleData 41 #define btMatrix3x3Data btMatrix3x3FloatData 42 #endif //BT_USE_DOUBLE_PRECISION 77 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 95 m_el[0].mVec128 = rhs.
m_el[0].mVec128;
96 m_el[1].mVec128 = rhs.
m_el[1].mVec128;
97 m_el[2].mVec128 = rhs.
m_el[2].mVec128;
103 m_el[0].mVec128 = m.
m_el[0].mVec128;
104 m_el[1].mVec128 = m.
m_el[1].mVec128;
105 m_el[2].mVec128 = m.
m_el[2].mVec128;
115 m_el[0] = other.
m_el[0];
116 m_el[1] = other.
m_el[1];
117 m_el[2] = other.
m_el[2];
123 m_el[0] = other.
m_el[0];
124 m_el[1] = other.
m_el[1];
125 m_el[2] = other.
m_el[2];
135 return btVector3(m_el[0][i], m_el[1][i], m_el[2][i]);
183 m_el[2].
setValue(m[2], m[6], m[10]);
212 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 213 __m128 vs, Q = q.get128();
214 __m128i Qi = btCastfTo128i(Q);
217 __m128 V11, V21, V31;
218 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
219 __m128i NQi = btCastfTo128i(NQ);
221 V1 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 2, 3)));
222 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0, 0, 1, 3));
223 V3 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(2, 1, 0, 3)));
224 V1 = _mm_xor_ps(V1, vMPPP);
226 V11 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 1, 0, 3)));
227 V21 = _mm_unpackhi_ps(Q, Q);
228 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0, 2, 0, 3));
234 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2, 3, 1, 3));
236 V21 = _mm_xor_ps(V21, vMPPP);
237 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3, 3, 1, 3));
238 V31 = _mm_xor_ps(V31, vMPPP);
239 Y = btCastiTo128f(_mm_shuffle_epi32(NQi, BT_SHUFFLE(3, 2, 0, 3)));
240 Z = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 1, 3)));
242 vs = _mm_load_ss(&s);
250 vs = bt_splat3_ps(vs, 0);
264 btScalar xs = q.
x() * s, ys = q.
y() * s, zs = q.
z() * s;
265 btScalar wx = q.
w() * xs, wy = q.
w() * ys, wz = q.
w() * zs;
266 btScalar xx = q.
x() * xs, xy = q.
x() * ys, xz = q.
x() * zs;
267 btScalar yy = q.
y() * ys, yz = q.
y() * zs, zz = q.
z() * zs;
269 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
270 xy + wz,
btScalar(1.0) - (xx + zz), yz - wx,
271 xz - wy, yz + wx,
btScalar(1.0) - (xx + yy));
282 setEulerZYX(roll, pitch, yaw);
308 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
309 cj * sh, sj * ss + cc, sj * cs - sc,
310 -sj, cj * si, cj * ci);
316 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 329 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 331 identityMatrix(v1000, v0100, v0010);
339 return identityMatrix;
346 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 347 __m128 v0 = m_el[0].mVec128;
348 __m128 v1 = m_el[1].mVec128;
349 __m128 v2 = m_el[2].mVec128;
350 __m128* vm = (__m128*)m;
353 v2 = _mm_and_ps(v2, btvFFF0fMask);
355 vT = _mm_unpackhi_ps(v0, v1);
356 v0 = _mm_unpacklo_ps(v0, v1);
358 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3));
359 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3));
360 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
365 #elif defined(BT_USE_NEON) 367 static const uint32x2_t zMask = (
const uint32x2_t){
static_cast<uint32_t>(-1), 0};
368 float32x4_t* vm = (float32x4_t*)m;
369 float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128);
370 float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f));
371 float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
372 float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
373 float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
374 float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q);
399 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 400 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
412 temp.f[0] = m_el[2].
y() - m_el[1].
z();
413 temp.f[1] = m_el[0].
z() - m_el[2].
x();
414 temp.f[2] = m_el[1].
x() - m_el[0].
y();
421 if (m_el[0].x() < m_el[1].y())
423 if (m_el[1].y() < m_el[2].z())
438 if (m_el[0].x() < m_el[2].z())
452 x = m_el[i][i] - m_el[j][j] - m_el[k][k] +
btScalar(1.0);
454 temp.f[3] = (m_el[k][j] - m_el[j][k]);
455 temp.f[j] = (m_el[j][i] + m_el[i][j]);
456 temp.f[k] = (m_el[k][i] + m_el[i][k]);
467 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
477 temp[0] = ((m_el[2].
y() - m_el[1].
z()) * s);
478 temp[1] = ((m_el[0].
z() - m_el[2].
x()) * s);
479 temp[2] = ((m_el[1].
x() - m_el[0].
y()) * s);
483 int i = m_el[0].
x() < m_el[1].
y() ? (m_el[1].
y() < m_el[2].
z() ? 2 : 1) : (m_el[0].x() < m_el[2].
z() ? 2 : 0);
491 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
492 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
493 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
495 q.
setValue(temp[0], temp[1], temp[2], temp[3]);
544 if (
btFabs(m_el[2].x()) >= 1)
555 euler_out.roll = euler_out.pitch + delta;
556 euler_out2.roll = euler_out.pitch + delta;
562 euler_out.roll = -euler_out.pitch + delta;
563 euler_out2.roll = -euler_out.pitch + delta;
568 euler_out.pitch = -
btAsin(m_el[2].x());
569 euler_out2.pitch =
SIMD_PI - euler_out.pitch;
571 euler_out.roll =
btAtan2(m_el[2].y() /
btCos(euler_out.pitch),
572 m_el[2].
z() /
btCos(euler_out.pitch));
573 euler_out2.roll =
btAtan2(m_el[2].y() /
btCos(euler_out2.pitch),
574 m_el[2].
z() /
btCos(euler_out2.pitch));
576 euler_out.yaw =
btAtan2(m_el[1].x() /
btCos(euler_out.pitch),
577 m_el[0].
x() /
btCos(euler_out.pitch));
578 euler_out2.yaw =
btAtan2(m_el[1].x() /
btCos(euler_out2.pitch),
579 m_el[0].
x() /
btCos(euler_out2.pitch));
582 if (solution_number == 1)
585 pitch = euler_out.pitch;
586 roll = euler_out.roll;
590 yaw = euler_out2.yaw;
591 pitch = euler_out2.pitch;
592 roll = euler_out2.roll;
601 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 602 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
605 m_el[0].x() * s.
x(), m_el[0].
y() * s.y(), m_el[0].
z() * s.z(),
606 m_el[1].
x() * s.x(), m_el[1].
y() * s.y(), m_el[1].
z() * s.z(),
607 m_el[2].
x() * s.x(), m_el[2].
y() * s.y(), m_el[2].
z() * s.z());
648 return m_el[0].
x() * v.
x() + m_el[1].
x() * v.
y() + m_el[2].
x() * v.
z();
652 return m_el[0].
y() * v.
x() + m_el[1].
y() * v.
y() + m_el[2].
y() * v.
z();
656 return m_el[0].
z() * v.
x() + m_el[1].
z() * v.
y() + m_el[2].
z() * v.
z();
670 for (iter = 0; iter < maxIter; iter++)
696 for (
int step = maxSteps; step > 0; step--)
731 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
737 t = (theta >= 0) ? 1 / (theta +
btSqrt(1 + theta2))
738 : 1 / (theta -
btSqrt(1 + theta2));
739 cos = 1 /
btSqrt(1 + t * t);
745 t = 1 / (theta * (2 +
btScalar(0.5) / theta2));
751 m_el[p][q] = m_el[q][p] = 0;
752 m_el[p][p] -= t * mpq;
753 m_el[q][q] += t * mpq;
756 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
757 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
760 for (
int i = 0; i < 3; i++)
765 row[p] = cos * mrp - sin * mrq;
766 row[q] = cos * mrq + sin * mrp;
780 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
797 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 798 __m128 rv00, rv01, rv02;
799 __m128 rv10, rv11, rv12;
800 __m128 rv20, rv21, rv22;
801 __m128 mv0, mv1, mv2;
803 rv02 =
m_el[0].mVec128;
804 rv12 =
m_el[1].mVec128;
805 rv22 =
m_el[2].mVec128;
807 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
808 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
809 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
812 rv00 = bt_splat_ps(rv02, 0);
813 rv01 = bt_splat_ps(rv02, 1);
814 rv02 = bt_splat_ps(rv02, 2);
816 rv00 = _mm_mul_ps(rv00, mv0);
817 rv01 = _mm_mul_ps(rv01, mv1);
818 rv02 = _mm_mul_ps(rv02, mv2);
821 rv10 = bt_splat_ps(rv12, 0);
822 rv11 = bt_splat_ps(rv12, 1);
823 rv12 = bt_splat_ps(rv12, 2);
825 rv10 = _mm_mul_ps(rv10, mv0);
826 rv11 = _mm_mul_ps(rv11, mv1);
827 rv12 = _mm_mul_ps(rv12, mv2);
830 rv20 = bt_splat_ps(rv22, 0);
831 rv21 = bt_splat_ps(rv22, 1);
832 rv22 = bt_splat_ps(rv22, 2);
834 rv20 = _mm_mul_ps(rv20, mv0);
835 rv21 = _mm_mul_ps(rv21, mv1);
836 rv22 = _mm_mul_ps(rv22, mv2);
838 rv00 = _mm_add_ps(rv00, rv01);
839 rv10 = _mm_add_ps(rv10, rv11);
840 rv20 = _mm_add_ps(rv20, rv21);
842 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
843 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
844 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
846 #elif defined(BT_USE_NEON) 848 float32x4_t rv0, rv1, rv2;
849 float32x4_t v0, v1, v2;
850 float32x4_t mv0, mv1, mv2;
852 v0 =
m_el[0].mVec128;
853 v1 =
m_el[1].mVec128;
854 v2 =
m_el[2].mVec128;
856 mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
857 mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
858 mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
860 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
861 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
862 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
864 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
865 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
866 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
868 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
869 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
870 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
872 m_el[0].mVec128 = rv0;
873 m_el[1].mVec128 = rv1;
874 m_el[2].mVec128 = rv2;
887 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 909 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 910 __m128 vk = bt_splat_ps(_mm_load_ss((
float*)&k), 0x80);
912 _mm_mul_ps(m[0].mVec128, vk),
913 _mm_mul_ps(m[1].mVec128, vk),
914 _mm_mul_ps(m[2].mVec128, vk));
915 #elif defined(BT_USE_NEON) 917 vmulq_n_f32(m[0].mVec128, k),
918 vmulq_n_f32(m[1].mVec128, k),
919 vmulq_n_f32(m[2].mVec128, k));
922 m[0].x() * k, m[0].y() * k, m[0].z() * k,
923 m[1].x() * k, m[1].y() * k, m[1].z() * k,
924 m[2].x() * k, m[2].y() * k, m[2].z() * k);
931 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 933 m1[0].mVec128 + m2[0].mVec128,
934 m1[1].mVec128 + m2[1].mVec128,
935 m1[2].mVec128 + m2[2].mVec128);
948 m1[2][2] + m2[2][2]);
955 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 957 m1[0].mVec128 - m2[0].mVec128,
958 m1[1].mVec128 - m2[1].mVec128,
959 m1[2].mVec128 - m2[2].mVec128);
972 m1[2][2] - m2[2][2]);
979 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 1001 return btTriple((*
this)[0], (*
this)[1], (*
this)[2]);
1007 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 1009 _mm_and_ps(
m_el[0].mVec128, btvAbsfMask),
1010 _mm_and_ps(
m_el[1].mVec128, btvAbsfMask),
1011 _mm_and_ps(
m_el[2].mVec128, btvAbsfMask));
1012 #elif defined(BT_USE_NEON) 1014 (float32x4_t)vandq_s32((int32x4_t)
m_el[0].mVec128, btv3AbsMask),
1015 (float32x4_t)vandq_s32((int32x4_t)
m_el[1].mVec128, btv3AbsMask),
1016 (float32x4_t)vandq_s32((int32x4_t)
m_el[2].mVec128, btv3AbsMask));
1028 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 1029 __m128 v0 =
m_el[0].mVec128;
1030 __m128 v1 =
m_el[1].mVec128;
1031 __m128 v2 =
m_el[2].mVec128;
1034 v2 = _mm_and_ps(v2, btvFFF0fMask);
1036 vT = _mm_unpackhi_ps(v0, v1);
1037 v0 = _mm_unpacklo_ps(v0, v1);
1039 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3));
1040 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3));
1041 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
1044 #elif defined(BT_USE_NEON) 1046 static const uint32x2_t zMask = (
const uint32x2_t){
static_cast<uint32_t>(-1), 0};
1047 float32x4x2_t top = vtrnq_f32(
m_el[0].mVec128,
m_el[1].mVec128);
1048 float32x2x2_t bl = vtrn_f32(vget_low_f32(
m_el[2].mVec128), vdup_n_f32(0.0f));
1049 float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
1050 float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
1051 float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(
m_el[2].mVec128), zMask);
1052 float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q);
1064 return btMatrix3x3(
cofac(1, 1, 2, 2),
cofac(0, 2, 2, 1),
cofac(0, 1, 1, 2),
1065 cofac(1, 2, 2, 0),
cofac(0, 0, 2, 2),
cofac(0, 2, 1, 0),
1066 cofac(1, 0, 2, 1),
cofac(0, 1, 2, 0),
cofac(0, 0, 1, 1));
1072 btVector3 co(
cofac(1, 1, 2, 2),
cofac(1, 2, 2, 0),
cofac(1, 0, 2, 1));
1078 co.
y() * s,
cofac(0, 0, 2, 2) * s,
cofac(0, 2, 1, 0) * s,
1079 co.
z() * s,
cofac(0, 1, 2, 0) * s,
cofac(0, 0, 1, 1) * s);
1085 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 1088 __m128 row =
m_el[0].mVec128;
1089 __m128 m0 = _mm_and_ps(m.
getRow(0).mVec128, btvFFF0fMask);
1090 __m128 m1 = _mm_and_ps(m.
getRow(1).mVec128, btvFFF0fMask);
1091 __m128 m2 = _mm_and_ps(m.
getRow(2).mVec128, btvFFF0fMask);
1092 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1093 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1094 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1095 row =
m_el[1].mVec128;
1096 r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1097 r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1098 r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1099 row =
m_el[2].mVec128;
1100 r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1101 r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1102 r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1105 #elif defined BT_USE_NEON 1107 static const uint32x4_t xyzMask = (
const uint32x4_t){
static_cast<uint32_t>(-1), static_cast<uint32_t>(-1),
static_cast<uint32_t>(-1), 0};
1108 float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.
getRow(0).mVec128, xyzMask);
1109 float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.
getRow(1).mVec128, xyzMask);
1110 float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.
getRow(2).mVec128, xyzMask);
1111 float32x4_t row =
m_el[0].mVec128;
1112 float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0);
1113 float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1);
1114 float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0);
1115 row =
m_el[1].mVec128;
1116 r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0);
1117 r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1);
1118 r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0);
1119 row =
m_el[2].mVec128;
1120 r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0);
1121 r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1);
1122 r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0);
1126 m_el[0].x() * m[0].x() +
m_el[1].x() * m[1].x() +
m_el[2].x() * m[2].x(),
1127 m_el[0].x() * m[0].y() +
m_el[1].x() * m[1].y() +
m_el[2].x() * m[2].y(),
1128 m_el[0].x() * m[0].z() +
m_el[1].x() * m[1].z() +
m_el[2].x() * m[2].z(),
1129 m_el[0].y() * m[0].x() +
m_el[1].y() * m[1].x() +
m_el[2].y() * m[2].x(),
1130 m_el[0].y() * m[0].y() +
m_el[1].y() * m[1].y() +
m_el[2].y() * m[2].y(),
1131 m_el[0].y() * m[0].z() +
m_el[1].y() * m[1].z() +
m_el[2].y() * m[2].z(),
1132 m_el[0].z() * m[0].x() +
m_el[1].z() * m[1].x() +
m_el[2].z() * m[2].x(),
1133 m_el[0].z() * m[0].y() +
m_el[1].z() * m[1].y() +
m_el[2].z() * m[2].y(),
1134 m_el[0].z() * m[0].z() +
m_el[1].z() * m[1].z() +
m_el[2].z() * m[2].z());
1141 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 1142 __m128 a0 =
m_el[0].mVec128;
1143 __m128 a1 =
m_el[1].mVec128;
1144 __m128 a2 =
m_el[2].mVec128;
1147 __m128 mx = mT[0].mVec128;
1148 __m128 my = mT[1].mVec128;
1149 __m128 mz = mT[2].mVec128;
1151 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1152 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1153 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1154 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1155 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1156 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1157 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1158 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1159 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1162 #elif defined BT_USE_NEON 1163 float32x4_t a0 =
m_el[0].mVec128;
1164 float32x4_t a1 =
m_el[1].mVec128;
1165 float32x4_t a2 =
m_el[2].mVec128;
1168 float32x4_t mx = mT[0].mVec128;
1169 float32x4_t my = mT[1].mVec128;
1170 float32x4_t mz = mT[2].mVec128;
1172 float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0);
1173 float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0);
1174 float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0);
1175 r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1);
1176 r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1);
1177 r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1);
1178 r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0);
1179 r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0);
1180 r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0);
1194 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 1195 return v.
dot3(m[0], m[1], m[2]);
1204 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 1206 const __m128 vv = v.mVec128;
1208 __m128 c0 = bt_splat_ps(vv, 0);
1209 __m128 c1 = bt_splat_ps(vv, 1);
1210 __m128 c2 = bt_splat_ps(vv, 2);
1212 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask));
1213 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask));
1214 c0 = _mm_add_ps(c0, c1);
1215 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask));
1218 #elif defined(BT_USE_NEON) 1219 const float32x4_t vv = v.mVec128;
1220 const float32x2_t vlo = vget_low_f32(vv);
1221 const float32x2_t vhi = vget_high_f32(vv);
1223 float32x4_t c0, c1, c2;
1225 c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1226 c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1227 c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1229 c0 = vmulq_lane_f32(c0, vlo, 0);
1230 c1 = vmulq_lane_f32(c1, vlo, 1);
1231 c2 = vmulq_lane_f32(c2, vhi, 0);
1232 c0 = vaddq_f32(c0, c1);
1233 c0 = vaddq_f32(c0, c2);
1244 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 1246 __m128 m10 = m1[0].mVec128;
1247 __m128 m11 = m1[1].mVec128;
1248 __m128 m12 = m1[2].mVec128;
1250 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1252 __m128 c0 = bt_splat_ps(m10, 0);
1253 __m128 c1 = bt_splat_ps(m11, 0);
1254 __m128 c2 = bt_splat_ps(m12, 0);
1256 c0 = _mm_mul_ps(c0, m2v);
1257 c1 = _mm_mul_ps(c1, m2v);
1258 c2 = _mm_mul_ps(c2, m2v);
1260 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1262 __m128 c0_1 = bt_splat_ps(m10, 1);
1263 __m128 c1_1 = bt_splat_ps(m11, 1);
1264 __m128 c2_1 = bt_splat_ps(m12, 1);
1266 c0_1 = _mm_mul_ps(c0_1, m2v);
1267 c1_1 = _mm_mul_ps(c1_1, m2v);
1268 c2_1 = _mm_mul_ps(c2_1, m2v);
1270 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1272 c0 = _mm_add_ps(c0, c0_1);
1273 c1 = _mm_add_ps(c1, c1_1);
1274 c2 = _mm_add_ps(c2, c2_1);
1276 m10 = bt_splat_ps(m10, 2);
1277 m11 = bt_splat_ps(m11, 2);
1278 m12 = bt_splat_ps(m12, 2);
1280 m10 = _mm_mul_ps(m10, m2v);
1281 m11 = _mm_mul_ps(m11, m2v);
1282 m12 = _mm_mul_ps(m12, m2v);
1284 c0 = _mm_add_ps(c0, m10);
1285 c1 = _mm_add_ps(c1, m11);
1286 c2 = _mm_add_ps(c2, m12);
1290 #elif defined(BT_USE_NEON) 1292 float32x4_t rv0, rv1, rv2;
1293 float32x4_t v0, v1, v2;
1294 float32x4_t mv0, mv1, mv2;
1300 mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1301 mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1302 mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1304 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1305 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1306 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1308 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1309 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1310 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1312 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1313 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1314 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1345 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 1349 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1350 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1351 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1353 c0 = _mm_and_ps(c0, c1);
1354 c0 = _mm_and_ps(c0, c2);
1356 int m = _mm_movemask_ps((__m128)c0);
1357 return (0x7 == (m & 0x7));
1360 return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1361 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1362 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]);
1380 for (
int i = 0; i < 3; i++)
1386 for (
int i = 0; i < 3; i++)
1392 for (
int i = 0; i < 3; i++)
1398 for (
int i = 0; i < 3; i++)
1404 for (
int i = 0; i < 3; i++)
1408 #endif //BT_MATRIX3x3_H const btScalar & x() const
Return the x value.
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
void serialize(struct btMatrix3x3Data &dataOut) const
btVector3DoubleData m_el[3]
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
btScalar tdoty(const btVector3 &v) const
btScalar tdotx(const btVector3 &v) const
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
btScalar norm() const
Return the norm (length) of the vector.
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
btScalar tdotz(const btVector3 &v) const
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
btScalar btSin(btScalar x)
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
btScalar btSqrt(btScalar y)
#define SIMD_FORCE_INLINE
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
btMatrix3x3 transpose() const
Return the transpose of the matrix.
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
void deSerialize(const struct btMatrix3x3Data &dataIn)
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
const btScalar & x() const
Return the x value.
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
btScalar dot(const btVector3 &v) const
Return the dot product.
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
btScalar btAtan2(btScalar x, btScalar y)
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
const btScalar & y() const
Return the y value.
const btScalar & z() const
Return the z value.
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
const btScalar & z() const
Return the z value.
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
btVector3 can be used to represent 3D points and vectors.
#define ATTRIBUTE_ALIGNED16(a)
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btScalar determinant() const
Return the determinant of the matrix.
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
btMatrix3x3()
No initializaion constructor.
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
btScalar length2() const
Return the length squared of the quaternion.
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
const btScalar & y() const
Return the y value.
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
btScalar btAsin(btScalar x)
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
btMatrix3x3 inverse() const
Return the inverse of the matrix.
const btScalar & w() const
Return the w value.
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
void setIdentity()
Set the matrix to the identity.
static const btMatrix3x3 & getIdentity()
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
btScalar btCos(btScalar x)
btVector3FloatData m_el[3]
btScalar btFabs(btScalar x)
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.