Added simple deadlock detection code.
This will assert and then deliberately crash the server once a deadlock is detected. For detection, only the world tick threads are considered, cWorld's m_WorldAge is checked periodically and if it doesn't increment for several seconds, a deadlock is reported.
This commit is contained in:
parent
f93d13c419
commit
50205bc4df
@ -390,6 +390,14 @@
|
||||
RelativePath="..\source\Cuboid.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\source\DeadlockDetect.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\source\DeadlockDetect.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\source\Defines.h"
|
||||
>
|
||||
|
155
source/DeadlockDetect.cpp
Normal file
155
source/DeadlockDetect.cpp
Normal file
@ -0,0 +1,155 @@
|
||||
|
||||
// DeadlockDetect.cpp
|
||||
|
||||
// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
|
||||
|
||||
#include "Globals.h"
|
||||
#include "DeadlockDetect.h"
|
||||
#include "Root.h"
|
||||
#include "World.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/// Number of milliseconds per cycle
|
||||
const int CYCLE_MILLISECONDS = 500;
|
||||
|
||||
/// When the number of cycles for the same world age hits this value, it is considered a deadlock
|
||||
const int NUM_CYCLES_LIMIT = 40; // 40 = twenty seconds
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
cDeadlockDetect::cDeadlockDetect(void) :
|
||||
super("DeadlockDetect")
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
bool cDeadlockDetect::Start(void)
|
||||
{
|
||||
// Read the initial world data:
|
||||
class cFillIn :
|
||||
public cWorldListCallback
|
||||
{
|
||||
public:
|
||||
cFillIn(cDeadlockDetect * a_Detect) :
|
||||
m_Detect(a_Detect)
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool Item(cWorld * a_World) override
|
||||
{
|
||||
m_Detect->SetWorldAge(a_World->GetName(), a_World->GetWorldAge());
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
cDeadlockDetect * m_Detect;
|
||||
} FillIn(this);
|
||||
cRoot::Get()->ForEachWorld(FillIn);
|
||||
return super::Start();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void cDeadlockDetect::Stop(void)
|
||||
{
|
||||
m_EvtTerminate.Set();
|
||||
super::Stop();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void cDeadlockDetect::Execute(void)
|
||||
{
|
||||
// Loop until the event is signalled
|
||||
while (m_EvtTerminate.Wait(CYCLE_MILLISECONDS) == cEvent::wrTimeout)
|
||||
{
|
||||
// Check the world ages:
|
||||
class cChecker :
|
||||
public cWorldListCallback
|
||||
{
|
||||
public:
|
||||
cChecker(cDeadlockDetect * a_Detect) :
|
||||
m_Detect(a_Detect)
|
||||
{
|
||||
}
|
||||
|
||||
protected:
|
||||
cDeadlockDetect * m_Detect;
|
||||
|
||||
virtual bool Item(cWorld * a_World) override
|
||||
{
|
||||
m_Detect->CheckWorldAge(a_World->GetName(), a_World->GetWorldAge());
|
||||
return false;
|
||||
}
|
||||
} Checker(this);
|
||||
cRoot::Get()->ForEachWorld(Checker);
|
||||
} // while (should run)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void cDeadlockDetect::SetWorldAge(const AString & a_WorldName, Int64 a_Age)
|
||||
{
|
||||
m_WorldAges[a_WorldName].m_Age = a_Age;
|
||||
m_WorldAges[a_WorldName].m_NumCyclesSame = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void cDeadlockDetect::CheckWorldAge(const AString & a_WorldName, Int64 a_Age)
|
||||
{
|
||||
WorldAges::iterator itr = m_WorldAges.find(a_WorldName);
|
||||
if (itr == m_WorldAges.end())
|
||||
{
|
||||
ASSERT(!"Unknown world in cDeadlockDetect");
|
||||
return;
|
||||
}
|
||||
if (itr->second.m_Age == a_Age)
|
||||
{
|
||||
itr->second.m_NumCyclesSame += 1;
|
||||
if (itr->second.m_NumCyclesSame > NUM_CYCLES_LIMIT)
|
||||
{
|
||||
DeadlockDetected();
|
||||
return;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
itr->second.m_Age = a_Age;
|
||||
itr->second.m_NumCyclesSame = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void cDeadlockDetect::DeadlockDetected(void)
|
||||
{
|
||||
ASSERT(!"Deadlock detected");
|
||||
|
||||
// TODO: Make a crashdump / coredump
|
||||
|
||||
// Crash the server intentionally:
|
||||
*((int *)0) = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
70
source/DeadlockDetect.h
Normal file
70
source/DeadlockDetect.h
Normal file
@ -0,0 +1,70 @@
|
||||
|
||||
// DeadlockDetect.h
|
||||
|
||||
// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
|
||||
|
||||
/*
|
||||
This class simply monitors each world's m_WorldAge, which is expected to grow on each tick.
|
||||
If the world age doesn't grow for several seconds, it's either because the server is super-overloaded,
|
||||
or because the world tick thread hangs in a deadlock. We presume the latter and therefore kill the server.
|
||||
Once we learn to write crashdumps programmatically, we should do so just before killing, to enable debugging.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "OSSupport/IsThread.h"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class cDeadlockDetect :
|
||||
public cIsThread
|
||||
{
|
||||
typedef cIsThread super;
|
||||
|
||||
public:
|
||||
cDeadlockDetect(void);
|
||||
|
||||
/// Starts the detection. Hides cIsThread's Start, because we need some initialization
|
||||
bool Start(void);
|
||||
|
||||
/// Stops the detection. Hides cIsThread's Stop, because we need to signal m_EvtTerminate
|
||||
void Stop(void);
|
||||
|
||||
protected:
|
||||
struct sWorldAge
|
||||
{
|
||||
/// Last m_WorldAge that has been detected in this world
|
||||
Int64 m_Age;
|
||||
|
||||
/// Number of cycles for which the age has been the same
|
||||
int m_NumCyclesSame;
|
||||
} ;
|
||||
|
||||
/// Maps world name -> sWorldAge
|
||||
typedef std::map<AString, sWorldAge> WorldAges;
|
||||
|
||||
WorldAges m_WorldAges;
|
||||
|
||||
cEvent m_EvtTerminate;
|
||||
|
||||
|
||||
// cIsThread overrides:
|
||||
virtual void Execute(void) override;
|
||||
|
||||
/// Sets the initial world age
|
||||
void SetWorldAge(const AString & a_WorldName, Int64 a_Age);
|
||||
|
||||
/// Checks if the world's age has changed, updates the world's stats; calls DeadlockDetected() if deadlock detected
|
||||
void CheckWorldAge(const AString & a_WorldName, Int64 a_Age);
|
||||
|
||||
/// Called when a deadlock is detected. Aborts the server.
|
||||
void DeadlockDetected(void);
|
||||
} ;
|
||||
|
||||
|
||||
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "Chunk.h"
|
||||
#include "Protocol/ProtocolRecognizer.h" // for protocol version constants
|
||||
#include "CommandOutput.h"
|
||||
#include "DeadlockDetect.h"
|
||||
|
||||
#include "../iniFile/iniFile.h"
|
||||
|
||||
@ -90,6 +91,7 @@ void cRoot::InputThread(void * a_Params)
|
||||
|
||||
void cRoot::Start(void)
|
||||
{
|
||||
cDeadlockDetect dd;
|
||||
delete m_Log;
|
||||
m_Log = new cMCLogger();
|
||||
|
||||
@ -162,6 +164,9 @@ void cRoot::Start(void)
|
||||
LOG("Starting worlds...");
|
||||
StartWorlds();
|
||||
|
||||
LOG("Starting deadlock detector...");
|
||||
dd.Start();
|
||||
|
||||
LOG("Starting server...");
|
||||
m_Server->Start();
|
||||
|
||||
@ -183,17 +188,21 @@ void cRoot::Start(void)
|
||||
|
||||
// Deallocate stuffs
|
||||
LOG("Shutting down server...");
|
||||
m_Server->Shutdown(); // This waits for threads to stop and d/c clients
|
||||
m_Server->Shutdown();
|
||||
|
||||
LOG("Shutting down deadlock detector...");
|
||||
dd.Stop();
|
||||
|
||||
LOG("Stopping world threads...");
|
||||
StopWorlds();
|
||||
|
||||
LOG("Stopping authenticator...");
|
||||
m_Authenticator.Stop();
|
||||
|
||||
|
||||
LOG("Freeing MonsterConfig...");
|
||||
delete m_MonsterConfig; m_MonsterConfig = 0;
|
||||
delete m_MonsterConfig; m_MonsterConfig = NULL;
|
||||
LOG("Stopping WebAdmin...");
|
||||
delete m_WebAdmin; m_WebAdmin = 0;
|
||||
delete m_WebAdmin; m_WebAdmin = NULL;
|
||||
LOG("Unloading recipes...");
|
||||
delete m_FurnaceRecipe; m_FurnaceRecipe = NULL;
|
||||
delete m_CraftingRecipes; m_CraftingRecipes = NULL;
|
||||
|
Loading…
Reference in New Issue
Block a user