19 #if BT_USE_OPENMP && BT_THREADSAFE 23 #endif // #if BT_USE_OPENMP && BT_THREADSAFE 25 #if BT_USE_PPL && BT_THREADSAFE 32 #endif // #if BT_USE_PPL && BT_THREADSAFE 34 #if BT_USE_TBB && BT_THREADSAFE 37 #define __TBB_NO_IMPLICIT_LINKAGE 1 39 #include <tbb/task_scheduler_init.h> 40 #include <tbb/parallel_for.h> 41 #include <tbb/blocked_range.h> 43 #endif // #if BT_USE_TBB && BT_THREADSAFE 53 #if __cplusplus >= 201103L 57 #define USE_CPP11_ATOMICS 1 59 #elif defined(_MSC_VER) 62 #define USE_MSVC_INTRINSICS 1 64 #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) 68 #define USE_GCC_BUILTIN_ATOMICS 1 70 #elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) 73 #define USE_GCC_BUILTIN_ATOMICS_OLD 1 82 #define THREAD_LOCAL_STATIC thread_local static 86 std::atomic<int>* aDest =
reinterpret_cast<std::atomic<int>*
>(&
mLock);
88 return std::atomic_compare_exchange_weak_explicit(aDest, &expected,
int(1), std::memory_order_acq_rel, std::memory_order_acquire);
102 std::atomic<int>* aDest =
reinterpret_cast<std::atomic<int>*
>(&
mLock);
103 std::atomic_store_explicit(aDest,
int(0), std::memory_order_release);
106 #elif USE_MSVC_INTRINSICS 108 #define WIN32_LEAN_AND_MEAN 113 #define THREAD_LOCAL_STATIC __declspec(thread) static 117 volatile long* aDest =
reinterpret_cast<long*
>(&
mLock);
118 return (0 == _InterlockedCompareExchange(aDest, 1, 0));
132 volatile long* aDest =
reinterpret_cast<long*
>(&
mLock);
133 _InterlockedExchange(aDest, 0);
136 #elif USE_GCC_BUILTIN_ATOMICS 138 #define THREAD_LOCAL_STATIC static __thread 144 const int memOrderSuccess = __ATOMIC_ACQ_REL;
145 const int memOrderFail = __ATOMIC_ACQUIRE;
146 return __atomic_compare_exchange_n(&
mLock, &expected,
int(1), weak, memOrderSuccess, memOrderFail);
160 __atomic_store_n(&
mLock,
int(0), __ATOMIC_RELEASE);
163 #elif USE_GCC_BUILTIN_ATOMICS_OLD 165 #define THREAD_LOCAL_STATIC static __thread 169 return __sync_bool_compare_and_swap(&
mLock,
int(0),
int(1));
184 __sync_fetch_and_and(&
mLock,
int(0));
187 #else //#elif USE_MSVC_INTRINSICS 189 #error "no threading primitives defined -- unknown platform" 191 #endif //#else //#elif USE_MSVC_INTRINSICS 193 #else //#if BT_THREADSAFE 198 btAssert(!
"unimplemented btSpinMutex::lock() called");
203 btAssert(!
"unimplemented btSpinMutex::unlock() called");
208 btAssert(!
"unimplemented btSpinMutex::tryLock() called");
212 #define THREAD_LOCAL_STATIC static 214 #endif // #else //#if BT_THREADSAFE 234 btAssert(!
"thread counter exceeded");
274 #define BT_DETECT_BAD_THREAD_INDEX 0 276 #if BT_DETECT_BAD_THREAD_INDEX 278 typedef DWORD ThreadId_t;
279 const static ThreadId_t kInvalidThreadId = 0;
282 static ThreadId_t getDebugThreadId()
284 return GetCurrentThreadId();
287 #endif // #if BT_DETECT_BAD_THREAD_INDEX 292 const unsigned int kNullIndex = ~0
U;
294 if (sThreadIndex == kNullIndex)
299 #if BT_DETECT_BAD_THREAD_INDEX 302 ThreadId_t tid = getDebugThreadId();
304 if (gDebugThreadIds[sThreadIndex] == kInvalidThreadId)
307 gDebugThreadIds[sThreadIndex] = tid;
311 if (gDebugThreadIds[sThreadIndex] != tid)
315 btAssert(!
"there are 2 or more threads with the same thread-index!");
320 #endif // #if BT_DETECT_BAD_THREAD_INDEX 391 btAssert(!
"btSetTaskScheduler must be called from the main thread!");
416 #if BT_DETECT_BAD_THREAD_INDEX 422 gDebugThreadIds[i] = kInvalidThreadId;
425 #endif // #if BT_DETECT_BAD_THREAD_INDEX 430 #else // #if BT_THREADSAFE 433 btAssert(!
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
436 #endif // #if BT_THREADSAFE 443 #if BT_DETECT_BAD_THREAD_INDEX 449 gDebugThreadIds[i] = kInvalidThreadId;
452 #endif // #if BT_DETECT_BAD_THREAD_INDEX 457 #else // #if BT_THREADSAFE 460 btAssert(!
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
461 return body.
sumLoop(iBegin, iEnd);
463 #endif //#else // #if BT_THREADSAFE 480 body.forLoop(iBegin, iEnd);
485 return body.sumLoop(iBegin, iEnd);
489 #if BT_USE_OPENMP && BT_THREADSAFE 504 return omp_get_max_threads();
516 omp_set_num_threads(1);
517 omp_set_num_threads(m_numThreads);
518 m_savedThreadCounter = 0;
528 #pragma omp parallel for schedule(static, 1) 529 for (
int i = iBegin; i < iEnd; i += grainSize)
532 body.forLoop(i, (std::min)(i + grainSize, iEnd));
541 #pragma omp parallel for schedule(static, 1) reduction(+ \ 543 for (
int i = iBegin; i < iEnd; i += grainSize)
546 sum += body.sumLoop(i, (std::min)(i + grainSize, iEnd));
552 #endif // #if BT_USE_OPENMP && BT_THREADSAFE 554 #if BT_USE_TBB && BT_THREADSAFE 561 tbb::task_scheduler_init* m_tbbSchedulerInit;
567 m_tbbSchedulerInit = NULL;
569 ~btTaskSchedulerTBB()
571 if (m_tbbSchedulerInit)
573 delete m_tbbSchedulerInit;
574 m_tbbSchedulerInit = NULL;
580 return tbb::task_scheduler_init::default_num_threads();
589 if (m_tbbSchedulerInit)
592 delete m_tbbSchedulerInit;
593 m_tbbSchedulerInit = NULL;
595 m_tbbSchedulerInit =
new tbb::task_scheduler_init(m_numThreads);
596 m_savedThreadCounter = 0;
602 struct ForBodyAdapter
607 void operator()(
const tbb::blocked_range<int>& range)
const 610 mBody->
forLoop(range.begin(), range.end());
616 ForBodyAdapter tbbBody(&body);
618 tbb::parallel_for(tbb::blocked_range<int>(iBegin, iEnd, grainSize),
620 tbb::simple_partitioner());
623 struct SumBodyAdapter
629 SumBodyAdapter(
const SumBodyAdapter& src,
tbb::split) : mBody(src.mBody), mSum(
btScalar(0)) {}
630 void join(
const SumBodyAdapter& src) { mSum += src.mSum; }
631 void operator()(
const tbb::blocked_range<int>& range)
634 mSum += mBody->
sumLoop(range.begin(), range.end());
640 SumBodyAdapter tbbBody(&body);
642 tbb::parallel_deterministic_reduce(tbb::blocked_range<int>(iBegin, iEnd, grainSize), tbbBody);
647 #endif // #if BT_USE_TBB && BT_THREADSAFE 649 #if BT_USE_PPL && BT_THREADSAFE 656 concurrency::combinable<btScalar> m_sum;
664 return concurrency::GetProcessorCount();
674 m_numThreads = (std::max)(1, (std::min)(maxThreadCount, numThreads));
675 using namespace concurrency;
676 if (CurrentScheduler::Id() != -1)
678 CurrentScheduler::Detach();
680 SchedulerPolicy policy;
684 policy.SetConcurrencyLimits(1, 1);
685 CurrentScheduler::Create(policy);
686 CurrentScheduler::Detach();
688 policy.SetConcurrencyLimits(m_numThreads, m_numThreads);
689 CurrentScheduler::Create(policy);
690 m_savedThreadCounter = 0;
696 struct ForBodyAdapter
702 ForBodyAdapter(
const btIParallelForBody* body,
int grainSize,
int end) : mBody(body), mGrainSize(grainSize), mIndexEnd(end) {}
703 void operator()(
int i)
const 706 mBody->
forLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
713 ForBodyAdapter pplBody(&body, grainSize, iEnd);
716 concurrency::parallel_for(iBegin,
722 struct SumBodyAdapter
725 concurrency::combinable<btScalar>* mSum;
729 SumBodyAdapter(
const btIParallelSumBody* body, concurrency::combinable<btScalar>*
sum,
int grainSize,
int end) : mBody(body), mSum(
sum), mGrainSize(grainSize), mIndexEnd(end) {}
730 void operator()(
int i)
const 733 mSum->local() += mBody->
sumLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
741 SumBodyAdapter pplBody(&body, &m_sum, grainSize, iEnd);
744 concurrency::parallel_for(iBegin,
749 return m_sum.combine(sumFunc);
752 #endif // #if BT_USE_PPL && BT_THREADSAFE 758 return &sTaskScheduler;
764 #if BT_USE_OPENMP && BT_THREADSAFE 765 static btTaskSchedulerOpenMP sTaskScheduler;
766 return &sTaskScheduler;
775 #if BT_USE_TBB && BT_THREADSAFE 776 static btTaskSchedulerTBB sTaskScheduler;
777 return &sTaskScheduler;
786 #if BT_USE_PPL && BT_THREADSAFE 787 static btTaskSchedulerPPL sTaskScheduler;
788 return &sTaskScheduler;
static T sum(const btAlignedObjectArray< T > &items)
btITaskScheduler * btGetPPLTaskScheduler()
void btResetThreadIndexCounter()
virtual void forLoop(int iBegin, int iEnd) const =0
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body) BT_OVERRIDE
btITaskScheduler * btGetTaskScheduler()
static int split(btDbvtNode **leaves, int count, const btVector3 &org, const btVector3 &axis)
virtual void deactivate()
virtual int getNumThreads() const =0
btSpinMutex – lightweight spin-mutex implemented with atomic ops, never puts a thread to sleep becau...
void btPushThreadsAreRunning()
virtual int getMaxNumThreads() const BT_OVERRIDE
const unsigned int BT_MAX_THREAD_COUNT
virtual void setNumThreads(int numThreads)=0
void btSetTaskScheduler(btITaskScheduler *ts)
btTaskSchedulerSequential()
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)=0
#define THREAD_LOCAL_STATIC
bool btThreadsAreRunning()
virtual void parallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)=0
virtual btScalar parallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body) BT_OVERRIDE
static ThreadsafeCounter gThreadCounter
btTaskSchedulerSequential – non-threaded implementation of task scheduler (really just useful for te...
virtual btScalar sumLoop(int iBegin, int iEnd) const =0
virtual int getNumThreads() const BT_OVERRIDE
btITaskScheduler(const char *name)
virtual void setNumThreads(int numThreads) BT_OVERRIDE
static btITaskScheduler * gBtTaskScheduler
btITaskScheduler * btGetSequentialTaskScheduler()
btITaskScheduler * btGetTBBTaskScheduler()
btScalar btParallelSum(int iBegin, int iEnd, int grainSize, const btIParallelSumBody &body)
unsigned int m_savedThreadCounter
static int gThreadsRunningCounter
void btParallelFor(int iBegin, int iEnd, int grainSize, const btIParallelForBody &body)
unsigned int btGetCurrentThreadIndex()
btITaskScheduler * btGetOpenMPTaskScheduler()
static btSpinMutex gThreadsRunningCounterMutex
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
void btPopThreadsAreRunning()
virtual int getMaxNumThreads() const =0