From 50205bc4df3c272b88a5edd81a35ac0aca8213d5 Mon Sep 17 00:00:00 2001 From: madmaxoft Date: Wed, 14 Aug 2013 22:39:12 +0200 Subject: [PATCH] Added simple deadlock detection code. This will assert and then deliberately crash the server once a deadlock is detected. For detection, only the world tick threads are considered, cWorld's m_WorldAge is checked periodically and if it doesn't increment for several seconds, a deadlock is reported. --- VC2008/MCServer.vcproj | 8 ++ source/DeadlockDetect.cpp | 155 ++++++++++++++++++++++++++++++++++++++ source/DeadlockDetect.h | 70 +++++++++++++++++ source/Root.cpp | 17 ++++- 4 files changed, 246 insertions(+), 4 deletions(-) create mode 100644 source/DeadlockDetect.cpp create mode 100644 source/DeadlockDetect.h diff --git a/VC2008/MCServer.vcproj b/VC2008/MCServer.vcproj index 4d850626d..0dcdeb3db 100644 --- a/VC2008/MCServer.vcproj +++ b/VC2008/MCServer.vcproj @@ -390,6 +390,14 @@ RelativePath="..\source\Cuboid.h" > + + + + diff --git a/source/DeadlockDetect.cpp b/source/DeadlockDetect.cpp new file mode 100644 index 000000000..960038f81 --- /dev/null +++ b/source/DeadlockDetect.cpp @@ -0,0 +1,155 @@ + +// DeadlockDetect.cpp + +// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one + +#include "Globals.h" +#include "DeadlockDetect.h" +#include "Root.h" +#include "World.h" + + + + + +/// Number of milliseconds per cycle +const int CYCLE_MILLISECONDS = 500; + +/// When the number of cycles for the same world age hits this value, it is considered a deadlock +const int NUM_CYCLES_LIMIT = 40; // 40 = twenty seconds + + + + + +cDeadlockDetect::cDeadlockDetect(void) : + super("DeadlockDetect") +{ +} + + + + + +bool cDeadlockDetect::Start(void) +{ + // Read the initial world data: + class cFillIn : + public cWorldListCallback + { + public: + cFillIn(cDeadlockDetect * a_Detect) : + m_Detect(a_Detect) + { + } + + virtual bool Item(cWorld * a_World) override + { + m_Detect->SetWorldAge(a_World->GetName(), a_World->GetWorldAge()); + return false; + } + + protected: + cDeadlockDetect * m_Detect; + } FillIn(this); + cRoot::Get()->ForEachWorld(FillIn); + return super::Start(); +} + + + + + +void cDeadlockDetect::Stop(void) +{ + m_EvtTerminate.Set(); + super::Stop(); +} + + + + + +void cDeadlockDetect::Execute(void) +{ + // Loop until the event is signalled + while (m_EvtTerminate.Wait(CYCLE_MILLISECONDS) == cEvent::wrTimeout) + { + // Check the world ages: + class cChecker : + public cWorldListCallback + { + public: + cChecker(cDeadlockDetect * a_Detect) : + m_Detect(a_Detect) + { + } + + protected: + cDeadlockDetect * m_Detect; + + virtual bool Item(cWorld * a_World) override + { + m_Detect->CheckWorldAge(a_World->GetName(), a_World->GetWorldAge()); + return false; + } + } Checker(this); + cRoot::Get()->ForEachWorld(Checker); + } // while (should run) +} + + + + + +void cDeadlockDetect::SetWorldAge(const AString & a_WorldName, Int64 a_Age) +{ + m_WorldAges[a_WorldName].m_Age = a_Age; + m_WorldAges[a_WorldName].m_NumCyclesSame = 0; +} + + + + + +void cDeadlockDetect::CheckWorldAge(const AString & a_WorldName, Int64 a_Age) +{ + WorldAges::iterator itr = m_WorldAges.find(a_WorldName); + if (itr == m_WorldAges.end()) + { + ASSERT(!"Unknown world in cDeadlockDetect"); + return; + } + if (itr->second.m_Age == a_Age) + { + itr->second.m_NumCyclesSame += 1; + if (itr->second.m_NumCyclesSame > NUM_CYCLES_LIMIT) + { + DeadlockDetected(); + return; + } + } + else + { + itr->second.m_Age = a_Age; + itr->second.m_NumCyclesSame = 0; + } +} + + + + + +void cDeadlockDetect::DeadlockDetected(void) +{ + ASSERT(!"Deadlock detected"); + + // TODO: Make a crashdump / coredump + + // Crash the server intentionally: + *((int *)0) = 0; +} + + + + diff --git a/source/DeadlockDetect.h b/source/DeadlockDetect.h new file mode 100644 index 000000000..bbd76826a --- /dev/null +++ b/source/DeadlockDetect.h @@ -0,0 +1,70 @@ + +// DeadlockDetect.h + +// Declares the cDeadlockDetect class that tries to detect deadlocks and aborts the server when it detects one + +/* +This class simply monitors each world's m_WorldAge, which is expected to grow on each tick. +If the world age doesn't grow for several seconds, it's either because the server is super-overloaded, +or because the world tick thread hangs in a deadlock. We presume the latter and therefore kill the server. +Once we learn to write crashdumps programmatically, we should do so just before killing, to enable debugging. +*/ + + + +#pragma once + +#include "OSSupport/IsThread.h" + + + + + +class cDeadlockDetect : + public cIsThread +{ + typedef cIsThread super; + +public: + cDeadlockDetect(void); + + /// Starts the detection. Hides cIsThread's Start, because we need some initialization + bool Start(void); + + /// Stops the detection. Hides cIsThread's Stop, because we need to signal m_EvtTerminate + void Stop(void); + +protected: + struct sWorldAge + { + /// Last m_WorldAge that has been detected in this world + Int64 m_Age; + + /// Number of cycles for which the age has been the same + int m_NumCyclesSame; + } ; + + /// Maps world name -> sWorldAge + typedef std::map WorldAges; + + WorldAges m_WorldAges; + + cEvent m_EvtTerminate; + + + // cIsThread overrides: + virtual void Execute(void) override; + + /// Sets the initial world age + void SetWorldAge(const AString & a_WorldName, Int64 a_Age); + + /// Checks if the world's age has changed, updates the world's stats; calls DeadlockDetected() if deadlock detected + void CheckWorldAge(const AString & a_WorldName, Int64 a_Age); + + /// Called when a deadlock is detected. Aborts the server. + void DeadlockDetected(void); +} ; + + + + diff --git a/source/Root.cpp b/source/Root.cpp index 166932cf2..07de0775c 100644 --- a/source/Root.cpp +++ b/source/Root.cpp @@ -16,6 +16,7 @@ #include "Chunk.h" #include "Protocol/ProtocolRecognizer.h" // for protocol version constants #include "CommandOutput.h" +#include "DeadlockDetect.h" #include "../iniFile/iniFile.h" @@ -90,6 +91,7 @@ void cRoot::InputThread(void * a_Params) void cRoot::Start(void) { + cDeadlockDetect dd; delete m_Log; m_Log = new cMCLogger(); @@ -162,6 +164,9 @@ void cRoot::Start(void) LOG("Starting worlds..."); StartWorlds(); + LOG("Starting deadlock detector..."); + dd.Start(); + LOG("Starting server..."); m_Server->Start(); @@ -183,17 +188,21 @@ void cRoot::Start(void) // Deallocate stuffs LOG("Shutting down server..."); - m_Server->Shutdown(); // This waits for threads to stop and d/c clients + m_Server->Shutdown(); + + LOG("Shutting down deadlock detector..."); + dd.Stop(); + LOG("Stopping world threads..."); StopWorlds(); + LOG("Stopping authenticator..."); m_Authenticator.Stop(); - LOG("Freeing MonsterConfig..."); - delete m_MonsterConfig; m_MonsterConfig = 0; + delete m_MonsterConfig; m_MonsterConfig = NULL; LOG("Stopping WebAdmin..."); - delete m_WebAdmin; m_WebAdmin = 0; + delete m_WebAdmin; m_WebAdmin = NULL; LOG("Unloading recipes..."); delete m_FurnaceRecipe; m_FurnaceRecipe = NULL; delete m_CraftingRecipes; m_CraftingRecipes = NULL;