Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 #ifndef BT_MATRIX3x3_H
16 #define BT_MATRIX3x3_H
17 
18 #include "btVector3.h"
19 #include "btQuaternion.h"
20 #include <stdio.h>
21 
22 #ifdef BT_USE_SSE
23 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
24 //const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
25 #define vMPPP (_mm_set_ps(+0.0f, +0.0f, +0.0f, -0.0f))
26 #endif
27 
28 #if defined(BT_USE_SSE)
29 #define v1000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f))
30 #define v0100 (_mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f))
31 #define v0010 (_mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f))
32 #elif defined(BT_USE_NEON)
33 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
34 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
35 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
36 #endif
37 
38 #ifdef BT_USE_DOUBLE_PRECISION
39 #define btMatrix3x3Data btMatrix3x3DoubleData
40 #else
41 #define btMatrix3x3Data btMatrix3x3FloatData
42 #endif //BT_USE_DOUBLE_PRECISION
43 
48 {
50  btVector3 m_el[3];
51 
52 public:
55 
56  // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
57 
59  explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
60  /*
61  template <typename btScalar>
62  Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
63  {
64  setEulerYPR(yaw, pitch, roll);
65  }
66  */
68  btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
69  const btScalar& yx, const btScalar& yy, const btScalar& yz,
70  const btScalar& zx, const btScalar& zy, const btScalar& zz)
71  {
72  setValue(xx, xy, xz,
73  yx, yy, yz,
74  zx, zy, zz);
75  }
76 
77 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
78  SIMD_FORCE_INLINE btMatrix3x3(const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2)
79  {
80  m_el[0].mVec128 = v0;
81  m_el[1].mVec128 = v1;
82  m_el[2].mVec128 = v2;
83  }
84 
85  SIMD_FORCE_INLINE btMatrix3x3(const btVector3& v0, const btVector3& v1, const btVector3& v2)
86  {
87  m_el[0] = v0;
88  m_el[1] = v1;
89  m_el[2] = v2;
90  }
91 
92  // Copy constructor
94  {
95  m_el[0].mVec128 = rhs.m_el[0].mVec128;
96  m_el[1].mVec128 = rhs.m_el[1].mVec128;
97  m_el[2].mVec128 = rhs.m_el[2].mVec128;
98  }
99 
100  // Assignment Operator
101  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
102  {
103  m_el[0].mVec128 = m.m_el[0].mVec128;
104  m_el[1].mVec128 = m.m_el[1].mVec128;
105  m_el[2].mVec128 = m.m_el[2].mVec128;
106 
107  return *this;
108  }
109 
110 #else
111 
114  {
115  m_el[0] = other.m_el[0];
116  m_el[1] = other.m_el[1];
117  m_el[2] = other.m_el[2];
118  }
119 
122  {
123  m_el[0] = other.m_el[0];
124  m_el[1] = other.m_el[1];
125  m_el[2] = other.m_el[2];
126  return *this;
127  }
128 
129 #endif
130 
134  {
135  return btVector3(m_el[0][i], m_el[1][i], m_el[2][i]);
136  }
137 
140  SIMD_FORCE_INLINE const btVector3& getRow(int i) const
141  {
142  btFullAssert(0 <= i && i < 3);
143  return m_el[i];
144  }
145 
149  {
150  btFullAssert(0 <= i && i < 3);
151  return m_el[i];
152  }
153 
157  {
158  btFullAssert(0 <= i && i < 3);
159  return m_el[i];
160  }
161 
165  btMatrix3x3& operator*=(const btMatrix3x3& m);
166 
170  btMatrix3x3& operator+=(const btMatrix3x3& m);
171 
175  btMatrix3x3& operator-=(const btMatrix3x3& m);
176 
180  {
181  m_el[0].setValue(m[0], m[4], m[8]);
182  m_el[1].setValue(m[1], m[5], m[9]);
183  m_el[2].setValue(m[2], m[6], m[10]);
184  }
195  void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
196  const btScalar& yx, const btScalar& yy, const btScalar& yz,
197  const btScalar& zx, const btScalar& zy, const btScalar& zz)
198  {
199  m_el[0].setValue(xx, xy, xz);
200  m_el[1].setValue(yx, yy, yz);
201  m_el[2].setValue(zx, zy, zz);
202  }
203 
206  void setRotation(const btQuaternion& q)
207  {
208  btScalar d = q.length2();
209  btFullAssert(d != btScalar(0.0));
210  btScalar s = btScalar(2.0) / d;
211 
212 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
213  __m128 vs, Q = q.get128();
214  __m128i Qi = btCastfTo128i(Q);
215  __m128 Y, Z;
216  __m128 V1, V2, V3;
217  __m128 V11, V21, V31;
218  __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
219  __m128i NQi = btCastfTo128i(NQ);
220 
221  V1 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 2, 3))); // Y X Z W
222  V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0, 0, 1, 3)); // -X -X Y W
223  V3 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(2, 1, 0, 3))); // Z Y X W
224  V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
225 
226  V11 = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 1, 0, 3))); // Y Y X W
227  V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
228  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0, 2, 0, 3)); // X Z -X -W
229 
230  V2 = V2 * V1; //
231  V1 = V1 * V11; //
232  V3 = V3 * V31; //
233 
234  V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2, 3, 1, 3)); // -Z -W Y W
235  V11 = V11 * V21; //
236  V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
237  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3, 3, 1, 3)); // W W -Y -W
238  V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
239  Y = btCastiTo128f(_mm_shuffle_epi32(NQi, BT_SHUFFLE(3, 2, 0, 3))); // -W -Z -X -W
240  Z = btCastiTo128f(_mm_shuffle_epi32(Qi, BT_SHUFFLE(1, 0, 1, 3))); // Y X Y W
241 
242  vs = _mm_load_ss(&s);
243  V21 = V21 * Y;
244  V31 = V31 * Z;
245 
246  V1 = V1 + V11;
247  V2 = V2 + V21;
248  V3 = V3 + V31;
249 
250  vs = bt_splat3_ps(vs, 0);
251  // s ready
252  V1 = V1 * vs;
253  V2 = V2 * vs;
254  V3 = V3 * vs;
255 
256  V1 = V1 + v1000;
257  V2 = V2 + v0100;
258  V3 = V3 + v0010;
259 
260  m_el[0] = V1;
261  m_el[1] = V2;
262  m_el[2] = V3;
263 #else
264  btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
265  btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
266  btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
267  btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
268  setValue(
269  btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
270  xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
271  xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
272 #endif
273  }
274 
280  void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
281  {
282  setEulerZYX(roll, pitch, yaw);
283  }
284 
294  void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
295  {
297  btScalar ci(btCos(eulerX));
298  btScalar cj(btCos(eulerY));
299  btScalar ch(btCos(eulerZ));
300  btScalar si(btSin(eulerX));
301  btScalar sj(btSin(eulerY));
302  btScalar sh(btSin(eulerZ));
303  btScalar cc = ci * ch;
304  btScalar cs = ci * sh;
305  btScalar sc = si * ch;
306  btScalar ss = si * sh;
307 
308  setValue(cj * ch, sj * sc - cs, sj * cc + ss,
309  cj * sh, sj * ss + cc, sj * cs - sc,
310  -sj, cj * si, cj * ci);
311  }
312 
314  void setIdentity()
315  {
316 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
317  m_el[0] = v1000;
318  m_el[1] = v0100;
319  m_el[2] = v0010;
320 #else
321  setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
322  btScalar(0.0), btScalar(1.0), btScalar(0.0),
323  btScalar(0.0), btScalar(0.0), btScalar(1.0));
324 #endif
325  }
326 
327  static const btMatrix3x3& getIdentity()
328  {
329 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
330  static const btMatrix3x3
331  identityMatrix(v1000, v0100, v0010);
332 #else
333  static const btMatrix3x3
334  identityMatrix(
335  btScalar(1.0), btScalar(0.0), btScalar(0.0),
336  btScalar(0.0), btScalar(1.0), btScalar(0.0),
337  btScalar(0.0), btScalar(0.0), btScalar(1.0));
338 #endif
339  return identityMatrix;
340  }
341 
344  void getOpenGLSubMatrix(btScalar * m) const
345  {
346 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
347  __m128 v0 = m_el[0].mVec128;
348  __m128 v1 = m_el[1].mVec128;
349  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
350  __m128* vm = (__m128*)m;
351  __m128 vT;
352 
353  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
354 
355  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
356  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
357 
358  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0
359  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0
360  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
361 
362  vm[0] = v0;
363  vm[1] = v1;
364  vm[2] = v2;
365 #elif defined(BT_USE_NEON)
366  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
367  static const uint32x2_t zMask = (const uint32x2_t){static_cast<uint32_t>(-1), 0};
368  float32x4_t* vm = (float32x4_t*)m;
369  float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
370  float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0}
371  float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
372  float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
373  float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
374  float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0
375 
376  vm[0] = v0;
377  vm[1] = v1;
378  vm[2] = v2;
379 #else
380  m[0] = btScalar(m_el[0].x());
381  m[1] = btScalar(m_el[1].x());
382  m[2] = btScalar(m_el[2].x());
383  m[3] = btScalar(0.0);
384  m[4] = btScalar(m_el[0].y());
385  m[5] = btScalar(m_el[1].y());
386  m[6] = btScalar(m_el[2].y());
387  m[7] = btScalar(0.0);
388  m[8] = btScalar(m_el[0].z());
389  m[9] = btScalar(m_el[1].z());
390  m[10] = btScalar(m_el[2].z());
391  m[11] = btScalar(0.0);
392 #endif
393  }
394 
397  void getRotation(btQuaternion & q) const
398  {
399 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
400  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
401  btScalar s, x;
402 
403  union {
404  btSimdFloat4 vec;
405  btScalar f[4];
406  } temp;
407 
408  if (trace > btScalar(0.0))
409  {
410  x = trace + btScalar(1.0);
411 
412  temp.f[0] = m_el[2].y() - m_el[1].z();
413  temp.f[1] = m_el[0].z() - m_el[2].x();
414  temp.f[2] = m_el[1].x() - m_el[0].y();
415  temp.f[3] = x;
416  //temp.f[3]= s * btScalar(0.5);
417  }
418  else
419  {
420  int i, j, k;
421  if (m_el[0].x() < m_el[1].y())
422  {
423  if (m_el[1].y() < m_el[2].z())
424  {
425  i = 2;
426  j = 0;
427  k = 1;
428  }
429  else
430  {
431  i = 1;
432  j = 2;
433  k = 0;
434  }
435  }
436  else
437  {
438  if (m_el[0].x() < m_el[2].z())
439  {
440  i = 2;
441  j = 0;
442  k = 1;
443  }
444  else
445  {
446  i = 0;
447  j = 1;
448  k = 2;
449  }
450  }
451 
452  x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
453 
454  temp.f[3] = (m_el[k][j] - m_el[j][k]);
455  temp.f[j] = (m_el[j][i] + m_el[i][j]);
456  temp.f[k] = (m_el[k][i] + m_el[i][k]);
457  temp.f[i] = x;
458  //temp.f[i] = s * btScalar(0.5);
459  }
460 
461  s = btSqrt(x);
462  q.set128(temp.vec);
463  s = btScalar(0.5) / s;
464 
465  q *= s;
466 #else
467  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
468 
469  btScalar temp[4];
470 
471  if (trace > btScalar(0.0))
472  {
473  btScalar s = btSqrt(trace + btScalar(1.0));
474  temp[3] = (s * btScalar(0.5));
475  s = btScalar(0.5) / s;
476 
477  temp[0] = ((m_el[2].y() - m_el[1].z()) * s);
478  temp[1] = ((m_el[0].z() - m_el[2].x()) * s);
479  temp[2] = ((m_el[1].x() - m_el[0].y()) * s);
480  }
481  else
482  {
483  int i = m_el[0].x() < m_el[1].y() ? (m_el[1].y() < m_el[2].z() ? 2 : 1) : (m_el[0].x() < m_el[2].z() ? 2 : 0);
484  int j = (i + 1) % 3;
485  int k = (i + 2) % 3;
486 
487  btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
488  temp[i] = s * btScalar(0.5);
489  s = btScalar(0.5) / s;
490 
491  temp[3] = (m_el[k][j] - m_el[j][k]) * s;
492  temp[j] = (m_el[j][i] + m_el[i][j]) * s;
493  temp[k] = (m_el[k][i] + m_el[i][k]) * s;
494  }
495  q.setValue(temp[0], temp[1], temp[2], temp[3]);
496 #endif
497  }
498 
503  void getEulerYPR(btScalar & yaw, btScalar & pitch, btScalar & roll) const
504  {
505  // first use the normal calculus
506  yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
507  pitch = btScalar(btAsin(-m_el[2].x()));
508  roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
509 
510  // on pitch = +/-HalfPI
511  if (btFabs(pitch) == SIMD_HALF_PI)
512  {
513  if (yaw > 0)
514  yaw -= SIMD_PI;
515  else
516  yaw += SIMD_PI;
517 
518  if (roll > 0)
519  roll -= SIMD_PI;
520  else
521  roll += SIMD_PI;
522  }
523  };
524 
530  void getEulerZYX(btScalar & yaw, btScalar & pitch, btScalar & roll, unsigned int solution_number = 1) const
531  {
532  struct Euler
533  {
534  btScalar yaw;
535  btScalar pitch;
536  btScalar roll;
537  };
538 
539  Euler euler_out;
540  Euler euler_out2; //second solution
541  //get the pointer to the raw data
542 
543  // Check that pitch is not at a singularity
544  if (btFabs(m_el[2].x()) >= 1)
545  {
546  euler_out.yaw = 0;
547  euler_out2.yaw = 0;
548 
549  // From difference of angles formula
550  btScalar delta = btAtan2(m_el[0].x(), m_el[0].z());
551  if (m_el[2].x() > 0) //gimbal locked up
552  {
553  euler_out.pitch = SIMD_PI / btScalar(2.0);
554  euler_out2.pitch = SIMD_PI / btScalar(2.0);
555  euler_out.roll = euler_out.pitch + delta;
556  euler_out2.roll = euler_out.pitch + delta;
557  }
558  else // gimbal locked down
559  {
560  euler_out.pitch = -SIMD_PI / btScalar(2.0);
561  euler_out2.pitch = -SIMD_PI / btScalar(2.0);
562  euler_out.roll = -euler_out.pitch + delta;
563  euler_out2.roll = -euler_out.pitch + delta;
564  }
565  }
566  else
567  {
568  euler_out.pitch = -btAsin(m_el[2].x());
569  euler_out2.pitch = SIMD_PI - euler_out.pitch;
570 
571  euler_out.roll = btAtan2(m_el[2].y() / btCos(euler_out.pitch),
572  m_el[2].z() / btCos(euler_out.pitch));
573  euler_out2.roll = btAtan2(m_el[2].y() / btCos(euler_out2.pitch),
574  m_el[2].z() / btCos(euler_out2.pitch));
575 
576  euler_out.yaw = btAtan2(m_el[1].x() / btCos(euler_out.pitch),
577  m_el[0].x() / btCos(euler_out.pitch));
578  euler_out2.yaw = btAtan2(m_el[1].x() / btCos(euler_out2.pitch),
579  m_el[0].x() / btCos(euler_out2.pitch));
580  }
581 
582  if (solution_number == 1)
583  {
584  yaw = euler_out.yaw;
585  pitch = euler_out.pitch;
586  roll = euler_out.roll;
587  }
588  else
589  {
590  yaw = euler_out2.yaw;
591  pitch = euler_out2.pitch;
592  roll = euler_out2.roll;
593  }
594  }
595 
599  btMatrix3x3 scaled(const btVector3& s) const
600  {
601 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
602  return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
603 #else
604  return btMatrix3x3(
605  m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
606  m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
607  m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
608 #endif
609  }
610 
612  btScalar determinant() const;
614  btMatrix3x3 adjoint() const;
616  btMatrix3x3 absolute() const;
618  btMatrix3x3 transpose() const;
620  btMatrix3x3 inverse() const;
621 
625  btVector3 solve33(const btVector3& b) const
626  {
627  btVector3 col1 = getColumn(0);
628  btVector3 col2 = getColumn(1);
629  btVector3 col3 = getColumn(2);
630 
631  btScalar det = btDot(col1, btCross(col2, col3));
632  if (btFabs(det) > SIMD_EPSILON)
633  {
634  det = 1.0f / det;
635  }
636  btVector3 x;
637  x[0] = det * btDot(b, btCross(col2, col3));
638  x[1] = det * btDot(col1, btCross(b, col3));
639  x[2] = det * btDot(col1, btCross(col2, b));
640  return x;
641  }
642 
643  btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
644  btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
645 
647  {
648  return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
649  }
651  {
652  return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
653  }
655  {
656  return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
657  }
658 
665  SIMD_FORCE_INLINE void extractRotation(btQuaternion & q, btScalar tolerance = 1.0e-9, int maxIter = 100)
666  {
667  int iter = 0;
668  btScalar w;
669  const btMatrix3x3& A = *this;
670  for (iter = 0; iter < maxIter; iter++)
671  {
672  btMatrix3x3 R(q);
673  btVector3 omega = (R.getColumn(0).cross(A.getColumn(0)) + R.getColumn(1).cross(A.getColumn(1)) + R.getColumn(2).cross(A.getColumn(2))) * (btScalar(1.0) / btFabs(R.getColumn(0).dot(A.getColumn(0)) + R.getColumn(1).dot(A.getColumn(1)) + R.getColumn(2).dot(A.getColumn(2))) +
674  tolerance);
675  w = omega.norm();
676  if (w < tolerance)
677  break;
678  q = btQuaternion(btVector3((btScalar(1.0) / w) * omega), w) *
679  q;
680  q.normalize();
681  }
682  }
683 
693  void diagonalize(btMatrix3x3 & rot, btScalar threshold, int maxSteps)
694  {
695  rot.setIdentity();
696  for (int step = maxSteps; step > 0; step--)
697  {
698  // find off-diagonal element [p][q] with largest magnitude
699  int p = 0;
700  int q = 1;
701  int r = 2;
702  btScalar max = btFabs(m_el[0][1]);
703  btScalar v = btFabs(m_el[0][2]);
704  if (v > max)
705  {
706  q = 2;
707  r = 1;
708  max = v;
709  }
710  v = btFabs(m_el[1][2]);
711  if (v > max)
712  {
713  p = 1;
714  q = 2;
715  r = 0;
716  max = v;
717  }
718 
719  btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
720  if (max <= t)
721  {
722  if (max <= SIMD_EPSILON * t)
723  {
724  return;
725  }
726  step = 1;
727  }
728 
729  // compute Jacobi rotation J which leads to a zero for element [p][q]
730  btScalar mpq = m_el[p][q];
731  btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
732  btScalar theta2 = theta * theta;
733  btScalar cos;
734  btScalar sin;
735  if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
736  {
737  t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
738  : 1 / (theta - btSqrt(1 + theta2));
739  cos = 1 / btSqrt(1 + t * t);
740  sin = cos * t;
741  }
742  else
743  {
744  // approximation for large theta-value, i.e., a nearly diagonal matrix
745  t = 1 / (theta * (2 + btScalar(0.5) / theta2));
746  cos = 1 - btScalar(0.5) * t * t;
747  sin = cos * t;
748  }
749 
750  // apply rotation to matrix (this = J^T * this * J)
751  m_el[p][q] = m_el[q][p] = 0;
752  m_el[p][p] -= t * mpq;
753  m_el[q][q] += t * mpq;
754  btScalar mrp = m_el[r][p];
755  btScalar mrq = m_el[r][q];
756  m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
757  m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
758 
759  // apply rotation to rot (rot = rot * J)
760  for (int i = 0; i < 3; i++)
761  {
762  btVector3& row = rot[i];
763  mrp = row[p];
764  mrq = row[q];
765  row[p] = cos * mrp - sin * mrq;
766  row[q] = cos * mrq + sin * mrp;
767  }
768  }
769  }
770 
778  btScalar cofac(int r1, int c1, int r2, int c2) const
779  {
780  return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
781  }
782 
783  void serialize(struct btMatrix3x3Data & dataOut) const;
784 
785  void serializeFloat(struct btMatrix3x3FloatData & dataOut) const;
786 
787  void deSerialize(const struct btMatrix3x3Data& dataIn);
788 
789  void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
790 
791  void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
792 };
793 
796 {
797 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)
798  __m128 rv00, rv01, rv02;
799  __m128 rv10, rv11, rv12;
800  __m128 rv20, rv21, rv22;
801  __m128 mv0, mv1, mv2;
802 
803  rv02 = m_el[0].mVec128;
804  rv12 = m_el[1].mVec128;
805  rv22 = m_el[2].mVec128;
806 
807  mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
808  mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
809  mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
810 
811  // rv0
812  rv00 = bt_splat_ps(rv02, 0);
813  rv01 = bt_splat_ps(rv02, 1);
814  rv02 = bt_splat_ps(rv02, 2);
815 
816  rv00 = _mm_mul_ps(rv00, mv0);
817  rv01 = _mm_mul_ps(rv01, mv1);
818  rv02 = _mm_mul_ps(rv02, mv2);
819 
820  // rv1
821  rv10 = bt_splat_ps(rv12, 0);
822  rv11 = bt_splat_ps(rv12, 1);
823  rv12 = bt_splat_ps(rv12, 2);
824 
825  rv10 = _mm_mul_ps(rv10, mv0);
826  rv11 = _mm_mul_ps(rv11, mv1);
827  rv12 = _mm_mul_ps(rv12, mv2);
828 
829  // rv2
830  rv20 = bt_splat_ps(rv22, 0);
831  rv21 = bt_splat_ps(rv22, 1);
832  rv22 = bt_splat_ps(rv22, 2);
833 
834  rv20 = _mm_mul_ps(rv20, mv0);
835  rv21 = _mm_mul_ps(rv21, mv1);
836  rv22 = _mm_mul_ps(rv22, mv2);
837 
838  rv00 = _mm_add_ps(rv00, rv01);
839  rv10 = _mm_add_ps(rv10, rv11);
840  rv20 = _mm_add_ps(rv20, rv21);
841 
842  m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
843  m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
844  m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
845 
846 #elif defined(BT_USE_NEON)
847 
848  float32x4_t rv0, rv1, rv2;
849  float32x4_t v0, v1, v2;
850  float32x4_t mv0, mv1, mv2;
851 
852  v0 = m_el[0].mVec128;
853  v1 = m_el[1].mVec128;
854  v2 = m_el[2].mVec128;
855 
856  mv0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
857  mv1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
858  mv2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
859 
860  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
861  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
862  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
863 
864  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
865  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
866  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
867 
868  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
869  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
870  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
871 
872  m_el[0].mVec128 = rv0;
873  m_el[1].mVec128 = rv1;
874  m_el[2].mVec128 = rv2;
875 #else
876  setValue(
877  m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
878  m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
879  m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
880 #endif
881  return *this;
882 }
883 
886 {
887 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
888  m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
889  m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
890  m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
891 #else
892  setValue(
893  m_el[0][0] + m.m_el[0][0],
894  m_el[0][1] + m.m_el[0][1],
895  m_el[0][2] + m.m_el[0][2],
896  m_el[1][0] + m.m_el[1][0],
897  m_el[1][1] + m.m_el[1][1],
898  m_el[1][2] + m.m_el[1][2],
899  m_el[2][0] + m.m_el[2][0],
900  m_el[2][1] + m.m_el[2][1],
901  m_el[2][2] + m.m_el[2][2]);
902 #endif
903  return *this;
904 }
905 
907 operator*(const btMatrix3x3& m, const btScalar& k)
908 {
909 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
910  __m128 vk = bt_splat_ps(_mm_load_ss((float*)&k), 0x80);
911  return btMatrix3x3(
912  _mm_mul_ps(m[0].mVec128, vk),
913  _mm_mul_ps(m[1].mVec128, vk),
914  _mm_mul_ps(m[2].mVec128, vk));
915 #elif defined(BT_USE_NEON)
916  return btMatrix3x3(
917  vmulq_n_f32(m[0].mVec128, k),
918  vmulq_n_f32(m[1].mVec128, k),
919  vmulq_n_f32(m[2].mVec128, k));
920 #else
921  return btMatrix3x3(
922  m[0].x() * k, m[0].y() * k, m[0].z() * k,
923  m[1].x() * k, m[1].y() * k, m[1].z() * k,
924  m[2].x() * k, m[2].y() * k, m[2].z() * k);
925 #endif
926 }
927 
929 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
930 {
931 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
932  return btMatrix3x3(
933  m1[0].mVec128 + m2[0].mVec128,
934  m1[1].mVec128 + m2[1].mVec128,
935  m1[2].mVec128 + m2[2].mVec128);
936 #else
937  return btMatrix3x3(
938  m1[0][0] + m2[0][0],
939  m1[0][1] + m2[0][1],
940  m1[0][2] + m2[0][2],
941 
942  m1[1][0] + m2[1][0],
943  m1[1][1] + m2[1][1],
944  m1[1][2] + m2[1][2],
945 
946  m1[2][0] + m2[2][0],
947  m1[2][1] + m2[2][1],
948  m1[2][2] + m2[2][2]);
949 #endif
950 }
951 
953 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
954 {
955 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
956  return btMatrix3x3(
957  m1[0].mVec128 - m2[0].mVec128,
958  m1[1].mVec128 - m2[1].mVec128,
959  m1[2].mVec128 - m2[2].mVec128);
960 #else
961  return btMatrix3x3(
962  m1[0][0] - m2[0][0],
963  m1[0][1] - m2[0][1],
964  m1[0][2] - m2[0][2],
965 
966  m1[1][0] - m2[1][0],
967  m1[1][1] - m2[1][1],
968  m1[1][2] - m2[1][2],
969 
970  m1[2][0] - m2[2][0],
971  m1[2][1] - m2[2][1],
972  m1[2][2] - m2[2][2]);
973 #endif
974 }
975 
978 {
979 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
980  m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
981  m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
982  m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
983 #else
984  setValue(
985  m_el[0][0] - m.m_el[0][0],
986  m_el[0][1] - m.m_el[0][1],
987  m_el[0][2] - m.m_el[0][2],
988  m_el[1][0] - m.m_el[1][0],
989  m_el[1][1] - m.m_el[1][1],
990  m_el[1][2] - m.m_el[1][2],
991  m_el[2][0] - m.m_el[2][0],
992  m_el[2][1] - m.m_el[2][1],
993  m_el[2][2] - m.m_el[2][2]);
994 #endif
995  return *this;
996 }
997 
1000 {
1001  return btTriple((*this)[0], (*this)[1], (*this)[2]);
1002 }
1003 
1006 {
1007 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1008  return btMatrix3x3(
1009  _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
1010  _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
1011  _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
1012 #elif defined(BT_USE_NEON)
1013  return btMatrix3x3(
1014  (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
1015  (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
1016  (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
1017 #else
1018  return btMatrix3x3(
1019  btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
1020  btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
1021  btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
1022 #endif
1023 }
1024 
1027 {
1028 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1029  __m128 v0 = m_el[0].mVec128;
1030  __m128 v1 = m_el[1].mVec128;
1031  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
1032  __m128 vT;
1033 
1034  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
1035 
1036  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
1037  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
1038 
1039  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3)); // y0 y1 y2 0
1040  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3)); // x0 x1 x2 0
1041  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
1042 
1043  return btMatrix3x3(v0, v1, v2);
1044 #elif defined(BT_USE_NEON)
1045  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
1046  static const uint32x2_t zMask = (const uint32x2_t){static_cast<uint32_t>(-1), 0};
1047  float32x4x2_t top = vtrnq_f32(m_el[0].mVec128, m_el[1].mVec128); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
1048  float32x2x2_t bl = vtrn_f32(vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f)); // {x2 0 }, {y2 0}
1049  float32x4_t v0 = vcombine_f32(vget_low_f32(top.val[0]), bl.val[0]);
1050  float32x4_t v1 = vcombine_f32(vget_low_f32(top.val[1]), bl.val[1]);
1051  float32x2_t q = (float32x2_t)vand_u32((uint32x2_t)vget_high_f32(m_el[2].mVec128), zMask);
1052  float32x4_t v2 = vcombine_f32(vget_high_f32(top.val[0]), q); // z0 z1 z2 0
1053  return btMatrix3x3(v0, v1, v2);
1054 #else
1055  return btMatrix3x3(m_el[0].x(), m_el[1].x(), m_el[2].x(),
1056  m_el[0].y(), m_el[1].y(), m_el[2].y(),
1057  m_el[0].z(), m_el[1].z(), m_el[2].z());
1058 #endif
1059 }
1060 
1063 {
1064  return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1065  cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1066  cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1067 }
1068 
1071 {
1072  btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1073  btScalar det = (*this)[0].dot(co);
1074  //btFullAssert(det != btScalar(0.0));
1075  btAssert(det != btScalar(0.0));
1076  btScalar s = btScalar(1.0) / det;
1077  return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1078  co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1079  co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1080 }
1081 
1084 {
1085 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1086  // zeros w
1087  // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1088  __m128 row = m_el[0].mVec128;
1089  __m128 m0 = _mm_and_ps(m.getRow(0).mVec128, btvFFF0fMask);
1090  __m128 m1 = _mm_and_ps(m.getRow(1).mVec128, btvFFF0fMask);
1091  __m128 m2 = _mm_and_ps(m.getRow(2).mVec128, btvFFF0fMask);
1092  __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1093  __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1094  __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1095  row = m_el[1].mVec128;
1096  r0 = _mm_add_ps(r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1097  r1 = _mm_add_ps(r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1098  r2 = _mm_add_ps(r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1099  row = m_el[2].mVec128;
1100  r0 = _mm_add_ps(r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1101  r1 = _mm_add_ps(r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1102  r2 = _mm_add_ps(r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1103  return btMatrix3x3(r0, r1, r2);
1104 
1105 #elif defined BT_USE_NEON
1106  // zeros w
1107  static const uint32x4_t xyzMask = (const uint32x4_t){static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0};
1108  float32x4_t m0 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(0).mVec128, xyzMask);
1109  float32x4_t m1 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(1).mVec128, xyzMask);
1110  float32x4_t m2 = (float32x4_t)vandq_u32((uint32x4_t)m.getRow(2).mVec128, xyzMask);
1111  float32x4_t row = m_el[0].mVec128;
1112  float32x4_t r0 = vmulq_lane_f32(m0, vget_low_f32(row), 0);
1113  float32x4_t r1 = vmulq_lane_f32(m0, vget_low_f32(row), 1);
1114  float32x4_t r2 = vmulq_lane_f32(m0, vget_high_f32(row), 0);
1115  row = m_el[1].mVec128;
1116  r0 = vmlaq_lane_f32(r0, m1, vget_low_f32(row), 0);
1117  r1 = vmlaq_lane_f32(r1, m1, vget_low_f32(row), 1);
1118  r2 = vmlaq_lane_f32(r2, m1, vget_high_f32(row), 0);
1119  row = m_el[2].mVec128;
1120  r0 = vmlaq_lane_f32(r0, m2, vget_low_f32(row), 0);
1121  r1 = vmlaq_lane_f32(r1, m2, vget_low_f32(row), 1);
1122  r2 = vmlaq_lane_f32(r2, m2, vget_high_f32(row), 0);
1123  return btMatrix3x3(r0, r1, r2);
1124 #else
1125  return btMatrix3x3(
1126  m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1127  m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1128  m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1129  m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1130  m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1131  m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1132  m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1133  m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1134  m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1135 #endif
1136 }
1137 
1140 {
1141 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1142  __m128 a0 = m_el[0].mVec128;
1143  __m128 a1 = m_el[1].mVec128;
1144  __m128 a2 = m_el[2].mVec128;
1145 
1146  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1147  __m128 mx = mT[0].mVec128;
1148  __m128 my = mT[1].mVec128;
1149  __m128 mz = mT[2].mVec128;
1150 
1151  __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1152  __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1153  __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1154  r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1155  r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1156  r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1157  r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1158  r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1159  r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1160  return btMatrix3x3(r0, r1, r2);
1161 
1162 #elif defined BT_USE_NEON
1163  float32x4_t a0 = m_el[0].mVec128;
1164  float32x4_t a1 = m_el[1].mVec128;
1165  float32x4_t a2 = m_el[2].mVec128;
1166 
1167  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1168  float32x4_t mx = mT[0].mVec128;
1169  float32x4_t my = mT[1].mVec128;
1170  float32x4_t mz = mT[2].mVec128;
1171 
1172  float32x4_t r0 = vmulq_lane_f32(mx, vget_low_f32(a0), 0);
1173  float32x4_t r1 = vmulq_lane_f32(mx, vget_low_f32(a1), 0);
1174  float32x4_t r2 = vmulq_lane_f32(mx, vget_low_f32(a2), 0);
1175  r0 = vmlaq_lane_f32(r0, my, vget_low_f32(a0), 1);
1176  r1 = vmlaq_lane_f32(r1, my, vget_low_f32(a1), 1);
1177  r2 = vmlaq_lane_f32(r2, my, vget_low_f32(a2), 1);
1178  r0 = vmlaq_lane_f32(r0, mz, vget_high_f32(a0), 0);
1179  r1 = vmlaq_lane_f32(r1, mz, vget_high_f32(a1), 0);
1180  r2 = vmlaq_lane_f32(r2, mz, vget_high_f32(a2), 0);
1181  return btMatrix3x3(r0, r1, r2);
1182 
1183 #else
1184  return btMatrix3x3(
1185  m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1186  m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1187  m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1188 #endif
1189 }
1190 
1192 operator*(const btMatrix3x3& m, const btVector3& v)
1193 {
1194 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
1195  return v.dot3(m[0], m[1], m[2]);
1196 #else
1197  return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1198 #endif
1199 }
1200 
1202 operator*(const btVector3& v, const btMatrix3x3& m)
1203 {
1204 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1205 
1206  const __m128 vv = v.mVec128;
1207 
1208  __m128 c0 = bt_splat_ps(vv, 0);
1209  __m128 c1 = bt_splat_ps(vv, 1);
1210  __m128 c2 = bt_splat_ps(vv, 2);
1211 
1212  c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask));
1213  c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask));
1214  c0 = _mm_add_ps(c0, c1);
1215  c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask));
1216 
1217  return btVector3(_mm_add_ps(c0, c2));
1218 #elif defined(BT_USE_NEON)
1219  const float32x4_t vv = v.mVec128;
1220  const float32x2_t vlo = vget_low_f32(vv);
1221  const float32x2_t vhi = vget_high_f32(vv);
1222 
1223  float32x4_t c0, c1, c2;
1224 
1225  c0 = (float32x4_t)vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1226  c1 = (float32x4_t)vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1227  c2 = (float32x4_t)vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1228 
1229  c0 = vmulq_lane_f32(c0, vlo, 0);
1230  c1 = vmulq_lane_f32(c1, vlo, 1);
1231  c2 = vmulq_lane_f32(c2, vhi, 0);
1232  c0 = vaddq_f32(c0, c1);
1233  c0 = vaddq_f32(c0, c2);
1234 
1235  return btVector3(c0);
1236 #else
1237  return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1238 #endif
1239 }
1240 
1242 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1243 {
1244 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1245 
1246  __m128 m10 = m1[0].mVec128;
1247  __m128 m11 = m1[1].mVec128;
1248  __m128 m12 = m1[2].mVec128;
1249 
1250  __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1251 
1252  __m128 c0 = bt_splat_ps(m10, 0);
1253  __m128 c1 = bt_splat_ps(m11, 0);
1254  __m128 c2 = bt_splat_ps(m12, 0);
1255 
1256  c0 = _mm_mul_ps(c0, m2v);
1257  c1 = _mm_mul_ps(c1, m2v);
1258  c2 = _mm_mul_ps(c2, m2v);
1259 
1260  m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1261 
1262  __m128 c0_1 = bt_splat_ps(m10, 1);
1263  __m128 c1_1 = bt_splat_ps(m11, 1);
1264  __m128 c2_1 = bt_splat_ps(m12, 1);
1265 
1266  c0_1 = _mm_mul_ps(c0_1, m2v);
1267  c1_1 = _mm_mul_ps(c1_1, m2v);
1268  c2_1 = _mm_mul_ps(c2_1, m2v);
1269 
1270  m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1271 
1272  c0 = _mm_add_ps(c0, c0_1);
1273  c1 = _mm_add_ps(c1, c1_1);
1274  c2 = _mm_add_ps(c2, c2_1);
1275 
1276  m10 = bt_splat_ps(m10, 2);
1277  m11 = bt_splat_ps(m11, 2);
1278  m12 = bt_splat_ps(m12, 2);
1279 
1280  m10 = _mm_mul_ps(m10, m2v);
1281  m11 = _mm_mul_ps(m11, m2v);
1282  m12 = _mm_mul_ps(m12, m2v);
1283 
1284  c0 = _mm_add_ps(c0, m10);
1285  c1 = _mm_add_ps(c1, m11);
1286  c2 = _mm_add_ps(c2, m12);
1287 
1288  return btMatrix3x3(c0, c1, c2);
1289 
1290 #elif defined(BT_USE_NEON)
1291 
1292  float32x4_t rv0, rv1, rv2;
1293  float32x4_t v0, v1, v2;
1294  float32x4_t mv0, mv1, mv2;
1295 
1296  v0 = m1[0].mVec128;
1297  v1 = m1[1].mVec128;
1298  v2 = m1[2].mVec128;
1299 
1300  mv0 = (float32x4_t)vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1301  mv1 = (float32x4_t)vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1302  mv2 = (float32x4_t)vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1303 
1304  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1305  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1306  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1307 
1308  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1309  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1310  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1311 
1312  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1313  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1314  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1315 
1316  return btMatrix3x3(rv0, rv1, rv2);
1317 
1318 #else
1319  return btMatrix3x3(
1320  m2.tdotx(m1[0]), m2.tdoty(m1[0]), m2.tdotz(m1[0]),
1321  m2.tdotx(m1[1]), m2.tdoty(m1[1]), m2.tdotz(m1[1]),
1322  m2.tdotx(m1[2]), m2.tdoty(m1[2]), m2.tdotz(m1[2]));
1323 #endif
1324 }
1325 
1326 /*
1327 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1328 return btMatrix3x3(
1329 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1330 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1331 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1332 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1333 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1334 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1335 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1336 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1337 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1338 }
1339 */
1340 
1344 {
1345 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
1346 
1347  __m128 c0, c1, c2;
1348 
1349  c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1350  c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1351  c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1352 
1353  c0 = _mm_and_ps(c0, c1);
1354  c0 = _mm_and_ps(c0, c2);
1355 
1356  int m = _mm_movemask_ps((__m128)c0);
1357  return (0x7 == (m & 0x7));
1358 
1359 #else
1360  return (m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1361  m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1362  m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2]);
1363 #endif
1364 }
1365 
1368 {
1370 };
1371 
1374 {
1376 };
1377 
1379 {
1380  for (int i = 0; i < 3; i++)
1381  m_el[i].serialize(dataOut.m_el[i]);
1382 }
1383 
1385 {
1386  for (int i = 0; i < 3; i++)
1387  m_el[i].serializeFloat(dataOut.m_el[i]);
1388 }
1389 
1391 {
1392  for (int i = 0; i < 3; i++)
1393  m_el[i].deSerialize(dataIn.m_el[i]);
1394 }
1395 
1397 {
1398  for (int i = 0; i < 3; i++)
1399  m_el[i].deSerializeFloat(dataIn.m_el[i]);
1400 }
1401 
1403 {
1404  for (int i = 0; i < 3; i++)
1405  m_el[i].deSerializeDouble(dataIn.m_el[i]);
1406 }
1407 
1408 #endif //BT_MATRIX3x3_H
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:113
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
Definition: btMatrix3x3.h:1396
#define SIMD_EPSILON
Definition: btScalar.h:523
for serialization
Definition: btMatrix3x3.h:1367
void serialize(struct btMatrix3x3Data &dataOut) const
Definition: btMatrix3x3.h:1378
btVector3DoubleData m_el[3]
Definition: btMatrix3x3.h:1375
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
Definition: btMatrix3x3.h:1343
btScalar tdoty(const btVector3 &v) const
Definition: btMatrix3x3.h:650
btScalar tdotx(const btVector3 &v) const
Definition: btMatrix3x3.h:646
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1139
btScalar norm() const
Return the norm (length) of the vector.
Definition: btVector3.h:263
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Definition: btVector3.h:640
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
Definition: btMatrix3x3.h:625
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
Definition: btMatrix3x3.h:206
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
Definition: btMatrix3x3.h:1384
btScalar tdotz(const btVector3 &v) const
Definition: btMatrix3x3.h:654
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
Definition: btVector3.h:720
btScalar btSin(btScalar x)
Definition: btScalar.h:479
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
Definition: btMatrix3x3.h:140
btScalar btSqrt(btScalar y)
Definition: btScalar.h:446
#define btAssert(x)
Definition: btScalar.h:133
unsigned int uint32_t
#define SIMD_FORCE_INLINE
Definition: btScalar.h:83
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
Definition: btMatrix3x3.h:778
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:929
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
Definition: btMatrix3x3.h:121
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:909
#define btFullAssert(x)
Definition: btScalar.h:136
#define SIMD_HALF_PI
Definition: btScalar.h:508
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
Definition: btMatrix3x3.h:50
btMatrix3x3 transpose() const
Return the transpose of the matrix.
Definition: btMatrix3x3.h:1026
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
Definition: btMatrix3x3.h:59
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
Definition: btVector3.h:918
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
Definition: btMatrix3x3.h:133
void deSerialize(const struct btMatrix3x3Data &dataIn)
Definition: btMatrix3x3.h:1390
#define SIMD_PI
Definition: btScalar.h:506
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1083
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
Definition: btMatrix3x3.h:693
const btScalar & x() const
Return the x value.
Definition: btVector3.h:575
#define btMatrix3x3Data
Definition: btMatrix3x3.h:41
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
Definition: btQuaternion.h:385
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
Definition: btMatrix3x3.h:1062
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
Definition: btMatrix3x3.h:1402
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
Definition: btVector3.h:380
btScalar dot(const btVector3 &v) const
Return the dot product.
Definition: btVector3.h:229
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
Definition: btMatrix3x3.h:795
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:498
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:149
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
Definition: btMatrix3x3.h:907
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
Definition: btMatrix3x3.h:599
const btScalar & y() const
Return the y value.
Definition: btVector3.h:577
const btScalar & z() const
Return the z value.
Definition: btVector3.h:579
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
Definition: btMatrix3x3.h:530
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
Definition: btMatrix3x3.h:195
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
Definition: btMatrix3x3.h:68
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:117
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
Definition: btMatrix3x3.h:503
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:80
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:84
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
Definition: btMatrix3x3.h:977
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
Definition: btMatrix3x3.h:280
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
Definition: btMatrix3x3.h:885
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:953
btScalar determinant() const
Return the determinant of the matrix.
Definition: btMatrix3x3.h:999
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:1005
btMatrix3x3()
No initializaion constructor.
Definition: btMatrix3x3.h:54
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
Definition: btMatrix3x3.h:344
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:156
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:364
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:148
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
Definition: btMatrix3x3.h:46
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:115
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:888
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
Definition: btMatrix3x3.h:113
for serialization
Definition: btMatrix3x3.h:1373
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:49
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
Definition: btMatrix3x3.h:179
btScalar btAsin(btScalar x)
Definition: btScalar.h:489
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
Definition: btVector3.h:890
btMatrix3x3 inverse() const
Return the inverse of the matrix.
Definition: btMatrix3x3.h:1070
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
Definition: btVector3.h:924
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
Definition: btMatrix3x3.h:397
void setIdentity()
Set the matrix to the identity.
Definition: btMatrix3x3.h:314
static const btMatrix3x3 & getIdentity()
Definition: btMatrix3x3.h:327
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:294
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
Definition: btMatrix3x3.h:665
btScalar btCos(btScalar x)
Definition: btScalar.h:478
btVector3FloatData m_el[3]
Definition: btMatrix3x3.h:1369
btScalar btFabs(btScalar x)
Definition: btScalar.h:477
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
Definition: btMatrix3x3.h:294