2013-08-14 16:39:12 -04:00
|
|
|
|
|
|
|
// DeadlockDetect.cpp
|
|
|
|
|
|
|
|
// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
|
|
|
|
|
|
|
|
#include "Globals.h"
|
|
|
|
#include "DeadlockDetect.h"
|
|
|
|
#include "Root.h"
|
|
|
|
#include "World.h"
|
2014-03-25 18:35:50 -04:00
|
|
|
#include <cstdlib>
|
2013-08-14 16:39:12 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2014-10-20 13:59:40 -04:00
|
|
|
/** Number of milliseconds per cycle */
|
2014-10-23 18:58:01 -04:00
|
|
|
const int CYCLE_MILLISECONDS = 100;
|
2013-08-14 16:39:12 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cDeadlockDetect::cDeadlockDetect(void) :
|
2021-03-28 08:34:57 -04:00
|
|
|
Super("Deadlock Detector"),
|
2014-08-21 16:39:53 -04:00
|
|
|
m_IntervalSec(1000)
|
2013-08-14 16:39:12 -04:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-01-17 16:38:04 -05:00
|
|
|
cDeadlockDetect::~cDeadlockDetect()
|
|
|
|
{
|
|
|
|
// Check that all tracked CSs have been removed, report any remaining:
|
|
|
|
cCSLock lock(m_CS);
|
|
|
|
if (!m_TrackedCriticalSections.empty())
|
|
|
|
{
|
|
|
|
LOGWARNING("DeadlockDetect: Some CS objects (%u) haven't been removed from tracking", static_cast<unsigned>(m_TrackedCriticalSections.size()));
|
|
|
|
for (const auto & tcs: m_TrackedCriticalSections)
|
|
|
|
{
|
|
|
|
LOGWARNING(" CS %p / %s",
|
|
|
|
static_cast<void *>(tcs.first),
|
|
|
|
tcs.second.c_str()
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-03-28 17:33:24 -04:00
|
|
|
void cDeadlockDetect::Start(int a_IntervalSec)
|
2013-08-14 16:39:12 -04:00
|
|
|
{
|
2013-11-30 16:14:47 -05:00
|
|
|
m_IntervalSec = a_IntervalSec;
|
2016-02-05 16:45:45 -05:00
|
|
|
|
2013-08-14 16:39:12 -04:00
|
|
|
// Read the initial world data:
|
2017-09-11 17:20:49 -04:00
|
|
|
cRoot::Get()->ForEachWorld([=](cWorld & a_World)
|
2021-03-28 17:33:24 -04:00
|
|
|
{
|
|
|
|
SetWorldAge(a_World.GetName(), a_World.GetWorldAge());
|
|
|
|
return false;
|
|
|
|
});
|
|
|
|
|
|
|
|
Super::Start();
|
2013-08-14 16:39:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-01-17 16:38:04 -05:00
|
|
|
void cDeadlockDetect::TrackCriticalSection(cCriticalSection & a_CS, const AString & a_Name)
|
|
|
|
{
|
|
|
|
cCSLock lock(m_CS);
|
2021-03-07 11:31:43 -05:00
|
|
|
m_TrackedCriticalSections.emplace_back(&a_CS, a_Name);
|
2017-01-17 16:38:04 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void cDeadlockDetect::UntrackCriticalSection(cCriticalSection & a_CS)
|
|
|
|
{
|
|
|
|
cCSLock lock(m_CS);
|
|
|
|
for (auto itr = m_TrackedCriticalSections.begin(), end = m_TrackedCriticalSections.end(); itr != end; ++itr)
|
|
|
|
{
|
|
|
|
if (itr->first == &a_CS)
|
|
|
|
{
|
|
|
|
m_TrackedCriticalSections.erase(itr);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2013-08-14 16:39:12 -04:00
|
|
|
void cDeadlockDetect::Execute(void)
|
|
|
|
{
|
2013-08-19 16:48:13 -04:00
|
|
|
// Loop until the signal to terminate:
|
2013-08-19 16:35:27 -04:00
|
|
|
while (!m_ShouldTerminate)
|
2013-08-14 16:39:12 -04:00
|
|
|
{
|
|
|
|
// Check the world ages:
|
2017-09-11 17:20:49 -04:00
|
|
|
cRoot::Get()->ForEachWorld([=](cWorld & a_World)
|
2021-04-04 20:38:43 -04:00
|
|
|
{
|
|
|
|
CheckWorldAge(a_World.GetName(), a_World.GetWorldAge());
|
|
|
|
return false;
|
|
|
|
});
|
2016-02-05 16:45:45 -05:00
|
|
|
|
2014-10-20 13:59:40 -04:00
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(CYCLE_MILLISECONDS));
|
2013-08-14 16:39:12 -04:00
|
|
|
} // while (should run)
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-04-04 20:38:43 -04:00
|
|
|
void cDeadlockDetect::SetWorldAge(const AString & a_WorldName, const cTickTimeLong a_Age)
|
2013-08-14 16:39:12 -04:00
|
|
|
{
|
|
|
|
m_WorldAges[a_WorldName].m_Age = a_Age;
|
|
|
|
m_WorldAges[a_WorldName].m_NumCyclesSame = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-04-04 20:38:43 -04:00
|
|
|
void cDeadlockDetect::CheckWorldAge(const AString & a_WorldName, const cTickTimeLong a_Age)
|
2013-08-14 16:39:12 -04:00
|
|
|
{
|
|
|
|
WorldAges::iterator itr = m_WorldAges.find(a_WorldName);
|
|
|
|
if (itr == m_WorldAges.end())
|
|
|
|
{
|
2014-05-05 11:06:45 -04:00
|
|
|
SetWorldAge(a_WorldName, a_Age);
|
2013-08-14 16:39:12 -04:00
|
|
|
return;
|
|
|
|
}
|
2014-04-18 15:09:44 -04:00
|
|
|
|
|
|
|
cDeadlockDetect::sWorldAge & WorldAge = itr->second;
|
|
|
|
|
|
|
|
if (WorldAge.m_Age == a_Age)
|
2013-08-14 16:39:12 -04:00
|
|
|
{
|
2014-04-18 15:09:44 -04:00
|
|
|
WorldAge.m_NumCyclesSame += 1;
|
2014-10-20 13:59:40 -04:00
|
|
|
if (WorldAge.m_NumCyclesSame > (m_IntervalSec * 1000) / CYCLE_MILLISECONDS)
|
2013-08-14 16:39:12 -04:00
|
|
|
{
|
2017-01-17 08:10:02 -05:00
|
|
|
DeadlockDetected(a_WorldName, a_Age);
|
2013-08-14 16:39:12 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2014-04-18 15:09:44 -04:00
|
|
|
WorldAge.m_Age = a_Age;
|
|
|
|
WorldAge.m_NumCyclesSame = 0;
|
2013-08-14 16:39:12 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-04-04 20:38:43 -04:00
|
|
|
void cDeadlockDetect::DeadlockDetected(const AString & a_WorldName, const cTickTimeLong a_WorldAge)
|
2013-08-14 16:39:12 -04:00
|
|
|
{
|
2017-01-17 08:10:02 -05:00
|
|
|
LOGERROR("Deadlock detected: world %s has been stuck at age %lld. Aborting the server.",
|
2021-04-04 20:38:43 -04:00
|
|
|
a_WorldName.c_str(), static_cast<long long>(a_WorldAge.count())
|
2017-01-17 08:10:02 -05:00
|
|
|
);
|
2017-01-17 16:38:04 -05:00
|
|
|
ListTrackedCSs();
|
2013-08-14 16:39:12 -04:00
|
|
|
ASSERT(!"Deadlock detected");
|
2021-04-04 20:38:43 -04:00
|
|
|
std::abort();
|
2013-08-14 16:39:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2017-01-17 16:38:04 -05:00
|
|
|
|
|
|
|
void cDeadlockDetect::ListTrackedCSs(void)
|
|
|
|
{
|
|
|
|
cCSLock lock(m_CS);
|
|
|
|
for (const auto & cs: m_TrackedCriticalSections)
|
|
|
|
{
|
|
|
|
LOG("CS at %p, %s: RecursionCount = %d, ThreadIDHash = %04llx",
|
|
|
|
static_cast<void *>(cs.first), cs.second.c_str(),
|
|
|
|
cs.first->m_RecursionCount, static_cast<UInt64>(std::hash<std::thread::id>()(cs.first->m_OwningThreadID))
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|