Added simple deadlock detection code.
This will assert and then deliberately crash the server once a deadlock is detected. For detection, only the world tick threads are considered, cWorld's m_WorldAge is checked periodically and if it doesn't increment for several seconds, a deadlock is reported.
This commit is contained in:
parent
f93d13c419
commit
50205bc4df
@ -390,6 +390,14 @@
|
|||||||
RelativePath="..\source\Cuboid.h"
|
RelativePath="..\source\Cuboid.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\source\DeadlockDetect.cpp"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\source\DeadlockDetect.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\source\Defines.h"
|
RelativePath="..\source\Defines.h"
|
||||||
>
|
>
|
||||||
|
155
source/DeadlockDetect.cpp
Normal file
155
source/DeadlockDetect.cpp
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
|
||||||
|
// DeadlockDetect.cpp
|
||||||
|
|
||||||
|
// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
|
||||||
|
|
||||||
|
#include "Globals.h"
|
||||||
|
#include "DeadlockDetect.h"
|
||||||
|
#include "Root.h"
|
||||||
|
#include "World.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/// Number of milliseconds per cycle
|
||||||
|
const int CYCLE_MILLISECONDS = 500;
|
||||||
|
|
||||||
|
/// When the number of cycles for the same world age hits this value, it is considered a deadlock
|
||||||
|
const int NUM_CYCLES_LIMIT = 40; // 40 = twenty seconds
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
cDeadlockDetect::cDeadlockDetect(void) :
|
||||||
|
super("DeadlockDetect")
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool cDeadlockDetect::Start(void)
|
||||||
|
{
|
||||||
|
// Read the initial world data:
|
||||||
|
class cFillIn :
|
||||||
|
public cWorldListCallback
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
cFillIn(cDeadlockDetect * a_Detect) :
|
||||||
|
m_Detect(a_Detect)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual bool Item(cWorld * a_World) override
|
||||||
|
{
|
||||||
|
m_Detect->SetWorldAge(a_World->GetName(), a_World->GetWorldAge());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
cDeadlockDetect * m_Detect;
|
||||||
|
} FillIn(this);
|
||||||
|
cRoot::Get()->ForEachWorld(FillIn);
|
||||||
|
return super::Start();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void cDeadlockDetect::Stop(void)
|
||||||
|
{
|
||||||
|
m_EvtTerminate.Set();
|
||||||
|
super::Stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void cDeadlockDetect::Execute(void)
|
||||||
|
{
|
||||||
|
// Loop until the event is signalled
|
||||||
|
while (m_EvtTerminate.Wait(CYCLE_MILLISECONDS) == cEvent::wrTimeout)
|
||||||
|
{
|
||||||
|
// Check the world ages:
|
||||||
|
class cChecker :
|
||||||
|
public cWorldListCallback
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
cChecker(cDeadlockDetect * a_Detect) :
|
||||||
|
m_Detect(a_Detect)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
cDeadlockDetect * m_Detect;
|
||||||
|
|
||||||
|
virtual bool Item(cWorld * a_World) override
|
||||||
|
{
|
||||||
|
m_Detect->CheckWorldAge(a_World->GetName(), a_World->GetWorldAge());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} Checker(this);
|
||||||
|
cRoot::Get()->ForEachWorld(Checker);
|
||||||
|
} // while (should run)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void cDeadlockDetect::SetWorldAge(const AString & a_WorldName, Int64 a_Age)
|
||||||
|
{
|
||||||
|
m_WorldAges[a_WorldName].m_Age = a_Age;
|
||||||
|
m_WorldAges[a_WorldName].m_NumCyclesSame = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void cDeadlockDetect::CheckWorldAge(const AString & a_WorldName, Int64 a_Age)
|
||||||
|
{
|
||||||
|
WorldAges::iterator itr = m_WorldAges.find(a_WorldName);
|
||||||
|
if (itr == m_WorldAges.end())
|
||||||
|
{
|
||||||
|
ASSERT(!"Unknown world in cDeadlockDetect");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (itr->second.m_Age == a_Age)
|
||||||
|
{
|
||||||
|
itr->second.m_NumCyclesSame += 1;
|
||||||
|
if (itr->second.m_NumCyclesSame > NUM_CYCLES_LIMIT)
|
||||||
|
{
|
||||||
|
DeadlockDetected();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
itr->second.m_Age = a_Age;
|
||||||
|
itr->second.m_NumCyclesSame = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void cDeadlockDetect::DeadlockDetected(void)
|
||||||
|
{
|
||||||
|
ASSERT(!"Deadlock detected");
|
||||||
|
|
||||||
|
// TODO: Make a crashdump / coredump
|
||||||
|
|
||||||
|
// Crash the server intentionally:
|
||||||
|
*((int *)0) = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
70
source/DeadlockDetect.h
Normal file
70
source/DeadlockDetect.h
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
|
||||||
|
// DeadlockDetect.h
|
||||||
|
|
||||||
|
// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one
|
||||||
|
|
||||||
|
/*
|
||||||
|
This class simply monitors each world's m_WorldAge, which is expected to grow on each tick.
|
||||||
|
If the world age doesn't grow for several seconds, it's either because the server is super-overloaded,
|
||||||
|
or because the world tick thread hangs in a deadlock. We presume the latter and therefore kill the server.
|
||||||
|
Once we learn to write crashdumps programmatically, we should do so just before killing, to enable debugging.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "OSSupport/IsThread.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class cDeadlockDetect :
|
||||||
|
public cIsThread
|
||||||
|
{
|
||||||
|
typedef cIsThread super;
|
||||||
|
|
||||||
|
public:
|
||||||
|
cDeadlockDetect(void);
|
||||||
|
|
||||||
|
/// Starts the detection. Hides cIsThread's Start, because we need some initialization
|
||||||
|
bool Start(void);
|
||||||
|
|
||||||
|
/// Stops the detection. Hides cIsThread's Stop, because we need to signal m_EvtTerminate
|
||||||
|
void Stop(void);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
struct sWorldAge
|
||||||
|
{
|
||||||
|
/// Last m_WorldAge that has been detected in this world
|
||||||
|
Int64 m_Age;
|
||||||
|
|
||||||
|
/// Number of cycles for which the age has been the same
|
||||||
|
int m_NumCyclesSame;
|
||||||
|
} ;
|
||||||
|
|
||||||
|
/// Maps world name -> sWorldAge
|
||||||
|
typedef std::map<AString, sWorldAge> WorldAges;
|
||||||
|
|
||||||
|
WorldAges m_WorldAges;
|
||||||
|
|
||||||
|
cEvent m_EvtTerminate;
|
||||||
|
|
||||||
|
|
||||||
|
// cIsThread overrides:
|
||||||
|
virtual void Execute(void) override;
|
||||||
|
|
||||||
|
/// Sets the initial world age
|
||||||
|
void SetWorldAge(const AString & a_WorldName, Int64 a_Age);
|
||||||
|
|
||||||
|
/// Checks if the world's age has changed, updates the world's stats; calls DeadlockDetected() if deadlock detected
|
||||||
|
void CheckWorldAge(const AString & a_WorldName, Int64 a_Age);
|
||||||
|
|
||||||
|
/// Called when a deadlock is detected. Aborts the server.
|
||||||
|
void DeadlockDetected(void);
|
||||||
|
} ;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -16,6 +16,7 @@
|
|||||||
#include "Chunk.h"
|
#include "Chunk.h"
|
||||||
#include "Protocol/ProtocolRecognizer.h" // for protocol version constants
|
#include "Protocol/ProtocolRecognizer.h" // for protocol version constants
|
||||||
#include "CommandOutput.h"
|
#include "CommandOutput.h"
|
||||||
|
#include "DeadlockDetect.h"
|
||||||
|
|
||||||
#include "../iniFile/iniFile.h"
|
#include "../iniFile/iniFile.h"
|
||||||
|
|
||||||
@ -90,6 +91,7 @@ void cRoot::InputThread(void * a_Params)
|
|||||||
|
|
||||||
void cRoot::Start(void)
|
void cRoot::Start(void)
|
||||||
{
|
{
|
||||||
|
cDeadlockDetect dd;
|
||||||
delete m_Log;
|
delete m_Log;
|
||||||
m_Log = new cMCLogger();
|
m_Log = new cMCLogger();
|
||||||
|
|
||||||
@ -162,6 +164,9 @@ void cRoot::Start(void)
|
|||||||
LOG("Starting worlds...");
|
LOG("Starting worlds...");
|
||||||
StartWorlds();
|
StartWorlds();
|
||||||
|
|
||||||
|
LOG("Starting deadlock detector...");
|
||||||
|
dd.Start();
|
||||||
|
|
||||||
LOG("Starting server...");
|
LOG("Starting server...");
|
||||||
m_Server->Start();
|
m_Server->Start();
|
||||||
|
|
||||||
@ -183,17 +188,21 @@ void cRoot::Start(void)
|
|||||||
|
|
||||||
// Deallocate stuffs
|
// Deallocate stuffs
|
||||||
LOG("Shutting down server...");
|
LOG("Shutting down server...");
|
||||||
m_Server->Shutdown(); // This waits for threads to stop and d/c clients
|
m_Server->Shutdown();
|
||||||
|
|
||||||
|
LOG("Shutting down deadlock detector...");
|
||||||
|
dd.Stop();
|
||||||
|
|
||||||
LOG("Stopping world threads...");
|
LOG("Stopping world threads...");
|
||||||
StopWorlds();
|
StopWorlds();
|
||||||
|
|
||||||
LOG("Stopping authenticator...");
|
LOG("Stopping authenticator...");
|
||||||
m_Authenticator.Stop();
|
m_Authenticator.Stop();
|
||||||
|
|
||||||
|
|
||||||
LOG("Freeing MonsterConfig...");
|
LOG("Freeing MonsterConfig...");
|
||||||
delete m_MonsterConfig; m_MonsterConfig = 0;
|
delete m_MonsterConfig; m_MonsterConfig = NULL;
|
||||||
LOG("Stopping WebAdmin...");
|
LOG("Stopping WebAdmin...");
|
||||||
delete m_WebAdmin; m_WebAdmin = 0;
|
delete m_WebAdmin; m_WebAdmin = NULL;
|
||||||
LOG("Unloading recipes...");
|
LOG("Unloading recipes...");
|
||||||
delete m_FurnaceRecipe; m_FurnaceRecipe = NULL;
|
delete m_FurnaceRecipe; m_FurnaceRecipe = NULL;
|
||||||
delete m_CraftingRecipes; m_CraftingRecipes = NULL;
|
delete m_CraftingRecipes; m_CraftingRecipes = NULL;
|
||||||
|
Loading…
Reference in New Issue
Block a user