16 #if defined(_WIN32) || defined(__i386__)
17 #define BT_USE_SSE_IN_API
30 #if defined(__CELLOS_LV2__) && defined(__SPU__)
31 #include <spu_intrinsics.h>
32 static inline vec_float4 vec_dot3(vec_float4 vec0, vec_float4 vec1)
35 result = spu_mul(vec0, vec1);
36 result = spu_madd(spu_rlqwbyte(vec0, 4), spu_rlqwbyte(vec1, 4), result);
37 return spu_madd(spu_rlqwbyte(vec0, 8), spu_rlqwbyte(vec1, 8), result);
72 btVector3 vec = localDirOrg * localScaling;
74 #if defined(__CELLOS_LV2__) && defined(__SPU__)
78 vec_float4 v_distMax = {-FLT_MAX, 0, 0, 0};
79 vec_int4 v_idxMax = {-999, 0, 0, 0};
81 int numverts = numPoints;
83 for (; v < (int)numverts - 4; v += 4)
85 vec_float4 p0 = vec_dot3(points[v].get128(), localDir.get128());
86 vec_float4 p1 = vec_dot3(points[v + 1].get128(), localDir.get128());
87 vec_float4 p2 = vec_dot3(points[v + 2].get128(), localDir.get128());
88 vec_float4 p3 = vec_dot3(points[v + 3].get128(), localDir.get128());
89 const vec_int4 i0 = {v, 0, 0, 0};
90 const vec_int4 i1 = {v + 1, 0, 0, 0};
91 const vec_int4 i2 = {v + 2, 0, 0, 0};
92 const vec_int4 i3 = {v + 3, 0, 0, 0};
93 vec_uint4 retGt01 = spu_cmpgt(p0, p1);
94 vec_float4 pmax01 = spu_sel(p1, p0, retGt01);
95 vec_int4 imax01 = spu_sel(i1, i0, retGt01);
96 vec_uint4 retGt23 = spu_cmpgt(p2, p3);
97 vec_float4 pmax23 = spu_sel(p3, p2, retGt23);
98 vec_int4 imax23 = spu_sel(i3, i2, retGt23);
99 vec_uint4 retGt0123 = spu_cmpgt(pmax01, pmax23);
100 vec_float4 pmax0123 = spu_sel(pmax23, pmax01, retGt0123);
101 vec_int4 imax0123 = spu_sel(imax23, imax01, retGt0123);
102 vec_uint4 retGtMax = spu_cmpgt(v_distMax, pmax0123);
103 v_distMax = spu_sel(pmax0123, v_distMax, retGtMax);
104 v_idxMax = spu_sel(imax0123, v_idxMax, retGtMax);
106 for (; v < (int)numverts; v++)
108 vec_float4 p = vec_dot3(points[v].get128(), localDir.get128());
109 const vec_int4 i = {v, 0, 0, 0};
110 vec_uint4 retGtMax = spu_cmpgt(v_distMax, p);
111 v_distMax = spu_sel(p, v_distMax, retGtMax);
112 v_idxMax = spu_sel(i, v_idxMax, retGtMax);
114 int ptIndex = spu_extract(v_idxMax, 0);
115 const btVector3& supVec = points[ptIndex] * localScaling;
120 long ptIndex = vec.
maxDot(points, numPoints, maxDot);
126 btVector3 supVec = points[ptIndex] * localScaling;
144 #if defined(__APPLE__) && (defined(BT_USE_SSE) || defined(BT_USE_NEON))
145 #if defined(BT_USE_SSE)
146 return btVector3(_mm_xor_ps(_mm_and_ps(localDir.mVec128, (__m128){-0.0f, -0.0f, -0.0f, -0.0f}), halfExtents.mVec128));
147 #elif defined(BT_USE_NEON)
148 return btVector3((float32x4_t)(((uint32x4_t)localDir.mVec128 & (uint32x4_t){0x80000000, 0x80000000, 0x80000000, 0x80000000}) ^ (uint32x4_t)halfExtents.mVec128));
150 #error unknown vector arch
154 btFsels(localDir.
y(), halfExtents.
y(), -halfExtents.
y()),
155 btFsels(localDir.
z(), halfExtents.
z(), -halfExtents.
z()));
163 btVector3 dots = dir.
dot3(vertices[0], vertices[1], vertices[2]);
174 int cylinderUpAxis = cylShape->
getUpAxis();
175 int XX(1), YY(0), ZZ(2);
177 switch (cylinderUpAxis)
206 btScalar halfHeight = halfExtents[cylinderUpAxis];
216 tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
223 tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
234 int capsuleUpAxis = capsuleShape->getUpAxis();
255 pos[capsuleUpAxis] = halfHeight;
258 newDot = vec.
dot(vtx);
268 pos[capsuleUpAxis] = -halfHeight;
271 newDot = vec.
dot(vtx);
384 btVector3 extent(margin, margin, margin);
385 aabbMin = center - extent;
386 aabbMax = center + extent;
396 halfExtents +=
btVector3(margin, margin, margin);
399 btVector3 extent = halfExtents.
dot3(abs_b[0], abs_b[1], abs_b[2]);
401 aabbMin = center - extent;
402 aabbMax = center + extent;
409 for (
int i = 0; i < 3; i++)
417 aabbMax[i] = tmp[i] + margin;
420 aabbMin[i] = tmp[i] - margin;
428 int m_upAxis = capsuleShape->
getUpAxis();
432 btVector3 extent = halfExtents.
dot3(abs_b[0], abs_b[1], abs_b[2]);
433 aabbMin = center - extent;
434 aabbMax = center + extent;
447 this->
getAabb(t, aabbMin, aabbMax);