1
0

Added simple deadlock detection code.

This will assert and then deliberately crash the server once a deadlock is detected. For detection, only the world tick threads are considered, cWorld's m_WorldAge is checked periodically and if it doesn't increment for several seconds, a deadlock is reported.
This commit is contained in:
madmaxoft 2013-08-14 22:39:12 +02:00
parent f93d13c419
commit 50205bc4df
4 changed files with 246 additions and 4 deletions

View File

@ -390,6 +390,14 @@
RelativePath="..\source\Cuboid.h"
>
</File>
<File
RelativePath="..\source\DeadlockDetect.cpp"
>
</File>
<File
RelativePath="..\source\DeadlockDetect.h"
>
</File>
<File
RelativePath="..\source\Defines.h"
>

155
source/DeadlockDetect.cpp Normal file
View File

@ -0,0 +1,155 @@
// DeadlockDetect.cpp
// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
#include "Globals.h"
#include "DeadlockDetect.h"
#include "Root.h"
#include "World.h"
/// Number of milliseconds per cycle
const int CYCLE_MILLISECONDS = 500;
/// When the number of cycles for the same world age hits this value, it is considered a deadlock
const int NUM_CYCLES_LIMIT = 40; // 40 = twenty seconds
cDeadlockDetect::cDeadlockDetect(void) :
super("DeadlockDetect")
{
}
bool cDeadlockDetect::Start(void)
{
// Read the initial world data:
class cFillIn :
public cWorldListCallback
{
public:
cFillIn(cDeadlockDetect * a_Detect) :
m_Detect(a_Detect)
{
}
virtual bool Item(cWorld * a_World) override
{
m_Detect->SetWorldAge(a_World->GetName(), a_World->GetWorldAge());
return false;
}
protected:
cDeadlockDetect * m_Detect;
} FillIn(this);
cRoot::Get()->ForEachWorld(FillIn);
return super::Start();
}
void cDeadlockDetect::Stop(void)
{
m_EvtTerminate.Set();
super::Stop();
}
void cDeadlockDetect::Execute(void)
{
// Loop until the event is signalled
while (m_EvtTerminate.Wait(CYCLE_MILLISECONDS) == cEvent::wrTimeout)
{
// Check the world ages:
class cChecker :
public cWorldListCallback
{
public:
cChecker(cDeadlockDetect * a_Detect) :
m_Detect(a_Detect)
{
}
protected:
cDeadlockDetect * m_Detect;
virtual bool Item(cWorld * a_World) override
{
m_Detect->CheckWorldAge(a_World->GetName(), a_World->GetWorldAge());
return false;
}
} Checker(this);
cRoot::Get()->ForEachWorld(Checker);
} // while (should run)
}
void cDeadlockDetect::SetWorldAge(const AString & a_WorldName, Int64 a_Age)
{
m_WorldAges[a_WorldName].m_Age = a_Age;
m_WorldAges[a_WorldName].m_NumCyclesSame = 0;
}
void cDeadlockDetect::CheckWorldAge(const AString & a_WorldName, Int64 a_Age)
{
WorldAges::iterator itr = m_WorldAges.find(a_WorldName);
if (itr == m_WorldAges.end())
{
ASSERT(!"Unknown world in cDeadlockDetect");
return;
}
if (itr->second.m_Age == a_Age)
{
itr->second.m_NumCyclesSame += 1;
if (itr->second.m_NumCyclesSame > NUM_CYCLES_LIMIT)
{
DeadlockDetected();
return;
}
}
else
{
itr->second.m_Age = a_Age;
itr->second.m_NumCyclesSame = 0;
}
}
void cDeadlockDetect::DeadlockDetected(void)
{
ASSERT(!"Deadlock detected");
// TODO: Make a crashdump / coredump
// Crash the server intentionally:
*((int *)0) = 0;
}

70
source/DeadlockDetect.h Normal file
View File

@ -0,0 +1,70 @@
// DeadlockDetect.h
// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
/*
This class simply monitors each world's m_WorldAge, which is expected to grow on each tick.
If the world age doesn't grow for several seconds, it's either because the server is super-overloaded,
or because the world tick thread hangs in a deadlock. We presume the latter and therefore kill the server.
Once we learn to write crashdumps programmatically, we should do so just before killing, to enable debugging.
*/
#pragma once
#include "OSSupport/IsThread.h"
class cDeadlockDetect :
public cIsThread
{
typedef cIsThread super;
public:
cDeadlockDetect(void);
/// Starts the detection. Hides cIsThread's Start, because we need some initialization
bool Start(void);
/// Stops the detection. Hides cIsThread's Stop, because we need to signal m_EvtTerminate
void Stop(void);
protected:
struct sWorldAge
{
/// Last m_WorldAge that has been detected in this world
Int64 m_Age;
/// Number of cycles for which the age has been the same
int m_NumCyclesSame;
} ;
/// Maps world name -> sWorldAge
typedef std::map<AString, sWorldAge> WorldAges;
WorldAges m_WorldAges;
cEvent m_EvtTerminate;
// cIsThread overrides:
virtual void Execute(void) override;
/// Sets the initial world age
void SetWorldAge(const AString & a_WorldName, Int64 a_Age);
/// Checks if the world's age has changed, updates the world's stats; calls DeadlockDetected() if deadlock detected
void CheckWorldAge(const AString & a_WorldName, Int64 a_Age);
/// Called when a deadlock is detected. Aborts the server.
void DeadlockDetected(void);
} ;

View File

@ -16,6 +16,7 @@
#include "Chunk.h"
#include "Protocol/ProtocolRecognizer.h" // for protocol version constants
#include "CommandOutput.h"
#include "DeadlockDetect.h"
#include "../iniFile/iniFile.h"
@ -90,6 +91,7 @@ void cRoot::InputThread(void * a_Params)
void cRoot::Start(void)
{
cDeadlockDetect dd;
delete m_Log;
m_Log = new cMCLogger();
@ -162,6 +164,9 @@ void cRoot::Start(void)
LOG("Starting worlds...");
StartWorlds();
LOG("Starting deadlock detector...");
dd.Start();
LOG("Starting server...");
m_Server->Start();
@ -183,17 +188,21 @@ void cRoot::Start(void)
// Deallocate stuffs
LOG("Shutting down server...");
m_Server->Shutdown(); // This waits for threads to stop and d/c clients
m_Server->Shutdown();
LOG("Shutting down deadlock detector...");
dd.Stop();
LOG("Stopping world threads...");
StopWorlds();
LOG("Stopping authenticator...");
m_Authenticator.Stop();
LOG("Freeing MonsterConfig...");
delete m_MonsterConfig; m_MonsterConfig = 0;
delete m_MonsterConfig; m_MonsterConfig = NULL;
LOG("Stopping WebAdmin...");
delete m_WebAdmin; m_WebAdmin = 0;
delete m_WebAdmin; m_WebAdmin = NULL;
LOG("Unloading recipes...");
delete m_FurnaceRecipe; m_FurnaceRecipe = NULL;
delete m_CraftingRecipes; m_CraftingRecipes = NULL;