40 #if (DBVT_MERGE_IMPL == DBVT_IMPL_SSE)
55 return (edges.
x() * edges.
y() * edges.
z() +
56 edges.
x() + edges.
y() + edges.
z());
68 maxdepth =
btMax(maxdepth, depth);
155 }
while (!root->
isleaf());
173 }
while (0 != (prev = node->
parent));
190 if (leaf == pdbvt->
m_root)
216 return (prev ? prev : pdbvt->
m_root);
266 while (begin != end &&
leftOfAxis(leaves[begin], org, axis))
276 while (begin != end && !
leftOfAxis(leaves[end - 1], org, axis))
289 leaves[begin] = leaves[end];
301 #if DBVT_MERGE_IMPL == DBVT_IMPL_SSE
305 volume = leaves[0]->
volume;
309 for (
int i = 1, ni = count; i < ni; ++i)
311 Merge(volume, leaves[i]->volume, volume);
324 int minidx[2] = {-1, -1};
325 for (
int i = 0; i < count; ++i)
327 for (
int j = i + 1; j < count; ++j)
338 btDbvtNode* n[] = {leaves[minidx[0]], leaves[minidx[1]]};
344 leaves[minidx[0]] = p;
345 leaves[minidx[1]] = leaves[count - 1];
362 if (count > bu_treshold)
368 int bestmidp = count;
369 int splitcount[3][2] = {{0, 0}, {0, 0}, {0, 0}};
371 for (i = 0; i < count; ++i)
374 for (
int j = 0; j < 3; ++j)
376 ++splitcount[j][
btDot(x, axis[j]) > 0 ? 1 : 0];
379 for (i = 0; i < 3; ++i)
381 if ((splitcount[i][0] > 0) && (splitcount[i][1] > 0))
383 const int midp = (int)
btFabs(
btScalar(splitcount[i][0] - splitcount[i][1]));
393 partition =
split(leaves, count, org, axis[bestaxis]);
394 btAssert(partition != 0 && partition != count);
398 partition = count / 2 + 1;
401 node->
childs[0] =
topdown(pdbvt, &leaves[0], partition, bu_treshold);
402 node->
childs[1] =
topdown(pdbvt, &leaves[partition], count - partition, bu_treshold);
450 while(n&&(count--)) n=n->
parent;
516 if (
m_root && (passes > 0))
525 bit = (bit + 1) & (
sizeof(
unsigned) * 8 - 1);
550 for (
int i = 0; (i < lookahead) && root->
parent; ++i)
624 for (
int i = 0; i < nodes.
nodes.
size(); ++i)
653 const int i = stack.
size() - 1;
670 }
while (stack.
size() > 0);
706 #if DBVT_ENABLE_BENCHMARK
710 #include "LinearMath/btQuickProf.h"
747 struct btDbvtBenchmark
751 NilPolicy() : m_pcount(0), m_depth(-
SIMD_INFINITY), m_checksort(true) {}
753 void Process(
const btDbvtNode*) { ++m_pcount; }
759 if (depth >= m_depth)
762 printf(
"wrong depth: %f (should be >= %f)\r\n", depth, m_depth);
782 static int sortfnc(
const Node& a,
const Node& b)
784 if (a.depth < b.depth)
return (+1);
785 if (a.depth > b.depth)
return (-1);
803 static int sortfnc(
const Node& a,
const Node& b)
805 if (a.depth < b.depth)
return (+1);
806 if (a.depth > b.depth)
return (-1);
814 return (rand() / (
btScalar)RAND_MAX);
818 return (
btVector3(RandUnit(), RandUnit(), RandUnit()));
822 return (RandVector3() * cs -
btVector3(cs, cs, cs) / 2);
838 for (
int i = 0; i < leaves; ++i)
840 dbvt.
insert(RandVolume(cs, eb, es), 0);
847 static const btScalar cfgVolumeCenterScale = 100;
848 static const btScalar cfgVolumeExentsBase = 1;
849 static const btScalar cfgVolumeExentsScale = 4;
850 static const int cfgLeaves = 8192;
851 static const bool cfgEnable =
true;
854 bool cfgBenchmark1_Enable = cfgEnable;
855 static const int cfgBenchmark1_Iterations = 8;
856 static const int cfgBenchmark1_Reference = 3499;
858 bool cfgBenchmark2_Enable = cfgEnable;
859 static const int cfgBenchmark2_Iterations = 4;
860 static const int cfgBenchmark2_Reference = 1945;
862 bool cfgBenchmark3_Enable = cfgEnable;
863 static const int cfgBenchmark3_Iterations = 512;
864 static const int cfgBenchmark3_Reference = 5485;
866 bool cfgBenchmark4_Enable = cfgEnable;
867 static const int cfgBenchmark4_Iterations = 512;
868 static const int cfgBenchmark4_Reference = 2814;
870 bool cfgBenchmark5_Enable = cfgEnable;
871 static const int cfgBenchmark5_Iterations = 512;
872 static const btScalar cfgBenchmark5_OffsetScale = 2;
873 static const int cfgBenchmark5_Reference = 7379;
875 bool cfgBenchmark6_Enable = cfgEnable;
876 static const int cfgBenchmark6_Iterations = 512;
877 static const btScalar cfgBenchmark6_OffsetScale = 2;
878 static const int cfgBenchmark6_Reference = 7270;
880 bool cfgBenchmark7_Enable = cfgEnable;
881 static const int cfgBenchmark7_Passes = 32;
882 static const int cfgBenchmark7_Iterations = 65536;
883 static const int cfgBenchmark7_Reference = 6307;
885 bool cfgBenchmark8_Enable = cfgEnable;
886 static const int cfgBenchmark8_Passes = 32;
887 static const int cfgBenchmark8_Iterations = 65536;
888 static const int cfgBenchmark8_Reference = 2105;
890 bool cfgBenchmark9_Enable = cfgEnable;
891 static const int cfgBenchmark9_Passes = 32;
892 static const int cfgBenchmark9_Iterations = 65536;
893 static const int cfgBenchmark9_Reference = 1879;
895 bool cfgBenchmark10_Enable = cfgEnable;
896 static const btScalar cfgBenchmark10_Scale = cfgVolumeCenterScale / 10000;
897 static const int cfgBenchmark10_Passes = 32;
898 static const int cfgBenchmark10_Iterations = 65536;
899 static const int cfgBenchmark10_Reference = 1244;
901 bool cfgBenchmark11_Enable = cfgEnable;
902 static const int cfgBenchmark11_Passes = 64;
903 static const int cfgBenchmark11_Iterations = 65536;
904 static const int cfgBenchmark11_Reference = 2510;
906 bool cfgBenchmark12_Enable = cfgEnable;
907 static const int cfgBenchmark12_Iterations = 32;
908 static const int cfgBenchmark12_Reference = 3677;
910 bool cfgBenchmark13_Enable = cfgEnable;
911 static const int cfgBenchmark13_Iterations = 1024;
912 static const int cfgBenchmark13_Reference = 2231;
914 bool cfgBenchmark14_Enable = cfgEnable;
915 static const int cfgBenchmark14_Iterations = 8192;
916 static const int cfgBenchmark14_Reference = 3500;
918 bool cfgBenchmark15_Enable = cfgEnable;
919 static const int cfgBenchmark15_Iterations = 8192;
920 static const int cfgBenchmark15_Reference = 1151;
922 bool cfgBenchmark16_Enable = cfgEnable;
923 static const int cfgBenchmark16_BatchCount = 256;
924 static const int cfgBenchmark16_Passes = 16384;
925 static const int cfgBenchmark16_Reference = 5138;
927 bool cfgBenchmark17_Enable = cfgEnable;
928 static const int cfgBenchmark17_Iterations = 4;
929 static const int cfgBenchmark17_Reference = 3390;
932 printf(
"Benchmarking dbvt...\r\n");
933 printf(
"\tWorld scale: %f\r\n", cfgVolumeCenterScale);
934 printf(
"\tExtents base: %f\r\n", cfgVolumeExentsBase);
935 printf(
"\tExtents range: %f\r\n", cfgVolumeExentsScale);
936 printf(
"\tLeaves: %u\r\n", cfgLeaves);
937 printf(
"\tsizeof(btDbvtVolume): %u bytes\r\n",
sizeof(
btDbvtVolume));
938 printf(
"\tsizeof(btDbvtNode): %u bytes\r\n",
sizeof(
btDbvtNode));
939 if (cfgBenchmark1_Enable)
944 volumes.
resize(cfgLeaves);
945 results.
resize(cfgLeaves);
946 for (
int i = 0; i < cfgLeaves; ++i)
948 volumes[i] = btDbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
950 printf(
"[1] btDbvtVolume intersections: ");
952 for (
int i = 0; i < cfgBenchmark1_Iterations; ++i)
954 for (
int j = 0; j < cfgLeaves; ++j)
956 for (
int k = 0; k < cfgLeaves; ++k)
958 results[k] =
Intersect(volumes[j], volumes[k]);
963 printf(
"%u ms (%i%%)\r\n", time, (time - cfgBenchmark1_Reference) * 100 / time);
965 if (cfgBenchmark2_Enable)
970 volumes.
resize(cfgLeaves);
971 results.
resize(cfgLeaves);
972 for (
int i = 0; i < cfgLeaves; ++i)
974 volumes[i] = btDbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
976 printf(
"[2] btDbvtVolume merges: ");
978 for (
int i = 0; i < cfgBenchmark2_Iterations; ++i)
980 for (
int j = 0; j < cfgLeaves; ++j)
982 for (
int k = 0; k < cfgLeaves; ++k)
984 Merge(volumes[j], volumes[k], results[k]);
989 printf(
"%u ms (%i%%)\r\n", time, (time - cfgBenchmark2_Reference) * 100 / time);
991 if (cfgBenchmark3_Enable)
995 btDbvtBenchmark::NilPolicy policy;
996 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[0]);
997 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[1]);
1000 printf(
"[3] btDbvt::collideTT: ");
1002 for (
int i = 0; i < cfgBenchmark3_Iterations; ++i)
1007 printf(
"%u ms (%i%%)\r\n", time, (time - cfgBenchmark3_Reference) * 100 / time);
1009 if (cfgBenchmark4_Enable)
1013 btDbvtBenchmark::NilPolicy policy;
1014 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1016 printf(
"[4] btDbvt::collideTT self: ");
1018 for (
int i = 0; i < cfgBenchmark4_Iterations; ++i)
1023 printf(
"%u ms (%i%%)\r\n", time, (time - cfgBenchmark4_Reference) * 100 / time);
1025 if (cfgBenchmark5_Enable)
1030 btDbvtBenchmark::NilPolicy policy;
1031 transforms.
resize(cfgBenchmark5_Iterations);
1032 for (
int i = 0; i < transforms.
size(); ++i)
1034 transforms[i] = btDbvtBenchmark::RandTransform(cfgVolumeCenterScale * cfgBenchmark5_OffsetScale);
1036 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[0]);
1037 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt[1]);
1040 printf(
"[5] btDbvt::collideTT xform: ");
1042 for (
int i = 0; i < cfgBenchmark5_Iterations; ++i)
1047 printf(
"%u ms (%i%%)\r\n", time, (time - cfgBenchmark5_Reference) * 100 / time);
1049 if (cfgBenchmark6_Enable)
1054 btDbvtBenchmark::NilPolicy policy;
1055 transforms.
resize(cfgBenchmark6_Iterations);
1056 for (
int i = 0; i < transforms.
size(); ++i)
1058 transforms[i] = btDbvtBenchmark::RandTransform(cfgVolumeCenterScale * cfgBenchmark6_OffsetScale);
1060 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1062 printf(
"[6] btDbvt::collideTT xform,self: ");
1064 for (
int i = 0; i < cfgBenchmark6_Iterations; ++i)
1069 printf(
"%u ms (%i%%)\r\n", time, (time - cfgBenchmark6_Reference) * 100 / time);
1071 if (cfgBenchmark7_Enable)
1077 btDbvtBenchmark::NilPolicy policy;
1078 rayorg.
resize(cfgBenchmark7_Iterations);
1079 raydir.
resize(cfgBenchmark7_Iterations);
1080 for (
int i = 0; i < rayorg.
size(); ++i)
1082 rayorg[i] = btDbvtBenchmark::RandVector3(cfgVolumeCenterScale * 2);
1083 raydir[i] = btDbvtBenchmark::RandVector3(cfgVolumeCenterScale * 2);
1085 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1087 printf(
"[7] btDbvt::rayTest: ");
1089 for (
int i = 0; i < cfgBenchmark7_Passes; ++i)
1091 for (
int j = 0; j < cfgBenchmark7_Iterations; ++j)
1097 unsigned rays = cfgBenchmark7_Passes * cfgBenchmark7_Iterations;
1098 printf(
"%u ms (%i%%),(%u r/s)\r\n", time, (time - cfgBenchmark7_Reference) * 100 / time, (rays * 1000) / time);
1100 if (cfgBenchmark8_Enable)
1104 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1106 printf(
"[8] insert/remove: ");
1108 for (
int i = 0; i < cfgBenchmark8_Passes; ++i)
1110 for (
int j = 0; j < cfgBenchmark8_Iterations; ++j)
1112 dbvt.
remove(dbvt.
insert(btDbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale), 0));
1116 const int ir = cfgBenchmark8_Passes * cfgBenchmark8_Iterations;
1117 printf(
"%u ms (%i%%),(%u ir/s)\r\n", time, (time - cfgBenchmark8_Reference) * 100 / time, ir * 1000 / time);
1119 if (cfgBenchmark9_Enable)
1124 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1127 printf(
"[9] updates (teleport): ");
1129 for (
int i = 0; i < cfgBenchmark9_Passes; ++i)
1131 for (
int j = 0; j < cfgBenchmark9_Iterations; ++j)
1133 dbvt.
update(const_cast<btDbvtNode*>(leaves[rand() % cfgLeaves]),
1134 btDbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale));
1138 const int up = cfgBenchmark9_Passes * cfgBenchmark9_Iterations;
1139 printf(
"%u ms (%i%%),(%u u/s)\r\n", time, (time - cfgBenchmark9_Reference) * 100 / time, up * 1000 / time);
1141 if (cfgBenchmark10_Enable)
1147 vectors.
resize(cfgBenchmark10_Iterations);
1148 for (
int i = 0; i < vectors.
size(); ++i)
1150 vectors[i] = (btDbvtBenchmark::RandVector3() * 2 -
btVector3(1, 1, 1)) * cfgBenchmark10_Scale;
1152 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1155 printf(
"[10] updates (jitter): ");
1158 for (
int i = 0; i < cfgBenchmark10_Passes; ++i)
1160 for (
int j = 0; j < cfgBenchmark10_Iterations; ++j)
1163 btDbvtNode* l = const_cast<btDbvtNode*>(leaves[rand() % cfgLeaves]);
1169 const int up = cfgBenchmark10_Passes * cfgBenchmark10_Iterations;
1170 printf(
"%u ms (%i%%),(%u u/s)\r\n", time, (time - cfgBenchmark10_Reference) * 100 / time, up * 1000 / time);
1172 if (cfgBenchmark11_Enable)
1176 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1178 printf(
"[11] optimize (incremental): ");
1180 for (
int i = 0; i < cfgBenchmark11_Passes; ++i)
1185 const int op = cfgBenchmark11_Passes * cfgBenchmark11_Iterations;
1186 printf(
"%u ms (%i%%),(%u o/s)\r\n", time, (time - cfgBenchmark11_Reference) * 100 / time, op / time * 1000);
1188 if (cfgBenchmark12_Enable)
1193 volumes.
resize(cfgLeaves);
1194 results.
resize(cfgLeaves);
1195 for (
int i = 0; i < cfgLeaves; ++i)
1197 volumes[i] = btDbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
1199 printf(
"[12] btDbvtVolume notequal: ");
1201 for (
int i = 0; i < cfgBenchmark12_Iterations; ++i)
1203 for (
int j = 0; j < cfgLeaves; ++j)
1205 for (
int k = 0; k < cfgLeaves; ++k)
1207 results[k] =
NotEqual(volumes[j], volumes[k]);
1212 printf(
"%u ms (%i%%)\r\n", time, (time - cfgBenchmark12_Reference) * 100 / time);
1214 if (cfgBenchmark13_Enable)
1219 btDbvtBenchmark::NilPolicy policy;
1220 vectors.
resize(cfgBenchmark13_Iterations);
1221 for (
int i = 0; i < vectors.
size(); ++i)
1223 vectors[i] = (btDbvtBenchmark::RandVector3() * 2 -
btVector3(1, 1, 1)).normalized();
1225 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1227 printf(
"[13] culling(OCL+fullsort): ");
1229 for (
int i = 0; i < cfgBenchmark13_Iterations; ++i)
1236 const int t = cfgBenchmark13_Iterations;
1237 printf(
"%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark13_Reference) * 100 / time, (t * 1000) / time);
1239 if (cfgBenchmark14_Enable)
1244 btDbvtBenchmark::P14 policy;
1245 vectors.
resize(cfgBenchmark14_Iterations);
1246 for (
int i = 0; i < vectors.
size(); ++i)
1248 vectors[i] = (btDbvtBenchmark::RandVector3() * 2 -
btVector3(1, 1, 1)).normalized();
1250 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1252 policy.m_nodes.reserve(cfgLeaves);
1253 printf(
"[14] culling(OCL+qsort): ");
1255 for (
int i = 0; i < cfgBenchmark14_Iterations; ++i)
1258 policy.m_nodes.resize(0);
1259 dbvt.
collideOCL(dbvt.
m_root, &vectors[i], &offset, vectors[i], 1, policy,
false);
1260 policy.m_nodes.quickSort(btDbvtBenchmark::P14::sortfnc);
1263 const int t = cfgBenchmark14_Iterations;
1264 printf(
"%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark14_Reference) * 100 / time, (t * 1000) / time);
1266 if (cfgBenchmark15_Enable)
1271 btDbvtBenchmark::P15 policy;
1272 vectors.
resize(cfgBenchmark15_Iterations);
1273 for (
int i = 0; i < vectors.
size(); ++i)
1275 vectors[i] = (btDbvtBenchmark::RandVector3() * 2 -
btVector3(1, 1, 1)).normalized();
1277 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1279 policy.m_nodes.reserve(cfgLeaves);
1280 printf(
"[15] culling(KDOP+qsort): ");
1282 for (
int i = 0; i < cfgBenchmark15_Iterations; ++i)
1285 policy.m_nodes.resize(0);
1286 policy.m_axis = vectors[i];
1288 policy.m_nodes.quickSort(btDbvtBenchmark::P15::sortfnc);
1291 const int t = cfgBenchmark15_Iterations;
1292 printf(
"%u ms (%i%%),(%u t/s)\r\n", time, (time - cfgBenchmark15_Reference) * 100 / time, (t * 1000) / time);
1294 if (cfgBenchmark16_Enable)
1299 btDbvtBenchmark::RandTree(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale, cfgLeaves, dbvt);
1301 batch.
reserve(cfgBenchmark16_BatchCount);
1302 printf(
"[16] insert/remove batch(%u): ", cfgBenchmark16_BatchCount);
1304 for (
int i = 0; i < cfgBenchmark16_Passes; ++i)
1306 for (
int j = 0; j < cfgBenchmark16_BatchCount; ++j)
1308 batch.
push_back(dbvt.
insert(btDbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale), 0));
1310 for (
int j = 0; j < cfgBenchmark16_BatchCount; ++j)
1317 const int ir = cfgBenchmark16_Passes * cfgBenchmark16_BatchCount;
1318 printf(
"%u ms (%i%%),(%u bir/s)\r\n", time, (time - cfgBenchmark16_Reference) * 100 / time,
int(ir * 1000.0 / time));
1320 if (cfgBenchmark17_Enable)
1326 volumes.
resize(cfgLeaves);
1327 results.
resize(cfgLeaves);
1328 indices.
resize(cfgLeaves);
1329 for (
int i = 0; i < cfgLeaves; ++i)
1332 volumes[i] = btDbvtBenchmark::RandVolume(cfgVolumeCenterScale, cfgVolumeExentsBase, cfgVolumeExentsScale);
1334 for (
int i = 0; i < cfgLeaves; ++i)
1336 btSwap(indices[i], indices[rand() % cfgLeaves]);
1338 printf(
"[17] btDbvtVolume select: ");
1340 for (
int i = 0; i < cfgBenchmark17_Iterations; ++i)
1342 for (
int j = 0; j < cfgLeaves; ++j)
1344 for (
int k = 0; k < cfgLeaves; ++k)
1346 const int idx = indices[k];
1347 results[idx] =
Select(volumes[idx], volumes[j], volumes[k]);
1352 printf(
"%u ms (%i%%)\r\n", time, (time - cfgBenchmark17_Reference) * 100 / time);