1
0
Fork 0

DeadlockDetect now lists some tracked CS's stats.

This commit is contained in:
Mattes D 2017-01-17 22:38:04 +01:00
parent 49e05d8cfe
commit 7cc3fb098d
18 changed files with 274 additions and 69 deletions

View File

@ -19,6 +19,7 @@ extern "C"
#include "LuaJson.h"
#include "../Entities/Entity.h"
#include "../BlockEntities/BlockEntity.h"
#include "../DeadlockDetect.h"
@ -2225,6 +2226,24 @@ void cLuaState::LogApiCallParamFailure(const char * a_FnName, const char * a_Par
void cLuaState::TrackInDeadlockDetect(cDeadlockDetect & a_DeadlockDetect)
{
a_DeadlockDetect.TrackCriticalSection(m_CS, Printf("cLuaState %s", m_SubsystemName.c_str()));
}
void cLuaState::UntrackInDeadlockDetect(cDeadlockDetect & a_DeadlockDetect)
{
a_DeadlockDetect.UntrackCriticalSection(m_CS);
}
int cLuaState::ReportFnCallErrors(lua_State * a_LuaState)
{
LOGWARNING("LUA: %s", lua_tostring(a_LuaState, -1));

View File

@ -46,6 +46,7 @@ class cLuaServerHandle;
class cLuaTCPLink;
class cLuaUDPEndpoint;
class cPluginLua;
class cDeadlockDetect;
@ -822,6 +823,12 @@ public:
logs the stack trace and stack values. */
void LogApiCallParamFailure(const char * a_FnName, const char * a_ParamNames);
/** Adds this object's CS to the DeadlockDetect's tracked CSs. */
void TrackInDeadlockDetect(cDeadlockDetect & a_DeadlockDetect);
/** Removes this object's CS from the DeadlockDetect's tracked CSs. */
void UntrackInDeadlockDetect(cDeadlockDetect & a_DeadlockDetect);
protected:
cCriticalSection m_CS;

View File

@ -33,10 +33,12 @@ extern "C"
////////////////////////////////////////////////////////////////////////////////
// cPluginLua:
cPluginLua::cPluginLua(const AString & a_PluginDirectory) :
cPluginLua::cPluginLua(const AString & a_PluginDirectory, cDeadlockDetect & a_DeadlockDetect) :
cPlugin(a_PluginDirectory),
m_LuaState(Printf("plugin %s", a_PluginDirectory.c_str()))
m_LuaState(Printf("plugin %s", a_PluginDirectory.c_str())),
m_DeadlockDetect(a_DeadlockDetect)
{
m_LuaState.TrackInDeadlockDetect(a_DeadlockDetect);
}
@ -46,6 +48,7 @@ cPluginLua::cPluginLua(const AString & a_PluginDirectory) :
cPluginLua::~cPluginLua()
{
Close();
m_LuaState.UntrackInDeadlockDetect(m_DeadlockDetect);
}

View File

@ -62,7 +62,7 @@ public:
cPluginLua(const AString & a_PluginDirectory);
cPluginLua(const AString & a_PluginDirectory, cDeadlockDetect & a_DeadlockDetect);
~cPluginLua();
virtual void OnDisable(void) override;
@ -179,6 +179,9 @@ protected:
/** Hooks that the plugin has registered. */
cHookMap m_HookMap;
/** The DeadlockDetect object to which the plugin's CS is tracked. */
cDeadlockDetect & m_DeadlockDetect;
/** Releases all Lua references, notifies and removes all m_Resettables[] and closes the m_LuaState. */
void Close(void);

View File

@ -32,8 +32,9 @@ cPluginManager * cPluginManager::Get(void)
cPluginManager::cPluginManager(void) :
m_bReloadPlugins(false)
cPluginManager::cPluginManager(cDeadlockDetect & a_DeadlockDetect) :
m_bReloadPlugins(false),
m_DeadlockDetect(a_DeadlockDetect)
{
}
@ -98,7 +99,7 @@ void cPluginManager::RefreshPluginList(void)
} // for plugin - m_Plugins[]
if (!hasFound)
{
m_Plugins.push_back(std::make_shared<cPluginLua>(folder));
m_Plugins.push_back(std::make_shared<cPluginLua>(folder, m_DeadlockDetect));
}
} // for folder - Folders[]
}
@ -602,25 +603,28 @@ bool cPluginManager::CallHookEntityChangedWorld(cEntity & a_Entity, cWorld & a_W
bool cPluginManager::CallHookExecuteCommand(cPlayer * a_Player, const AStringVector & a_Split, const AString & a_EntireCommand, CommandResult & a_Result)
{
// Output the command being executed to log (for troubleshooting deadlocks-in-commands):
auto world = a_Player->GetWorld();
AString worldName;
Int64 worldAge;
if (world != nullptr)
if (a_Player != nullptr)
{
worldName = world->GetName();
worldAge = world->GetWorldAge();
auto world = a_Player->GetWorld();
AString worldName;
Int64 worldAge;
if (world != nullptr)
{
worldName = world->GetName();
worldAge = world->GetWorldAge();
}
else
{
worldName = "<no world>";
worldAge = 0;
}
LOG("Player %s is executing command \"%s\" in world \"%s\" at world age %lld.",
a_Player->GetName().c_str(),
a_EntireCommand.c_str(),
worldName.c_str(),
worldAge
);
}
else
{
worldName = "<no world>";
worldAge = 0;
}
LOG("Player %s is executing command \"%s\" in world \"%s\" at world age %lld.",
a_Player->GetName().c_str(),
a_EntireCommand.c_str(),
worldName.c_str(),
worldAge
);
FIND_HOOK(HOOK_EXECUTE_COMMAND);
VERIFY_HOOK;

View File

@ -26,6 +26,7 @@ class cPlugin;
class cProjectileEntity;
class cWorld;
class cSettingsRepositoryInterface;
class cDeadlockDetect;
struct TakeDamageInfo;
typedef SharedPtr<cPlugin> cPluginPtr;
@ -413,8 +414,11 @@ private:
/** If set to true, all the plugins will be reloaded within the next call to Tick(). */
bool m_bReloadPlugins;
/** The deadlock detect in which all plugins should track their CSs. */
cDeadlockDetect & m_DeadlockDetect;
cPluginManager();
cPluginManager(cDeadlockDetect & a_DeadlockDetect);
virtual ~cPluginManager();
/** Reloads all plugins, defaulting to settings.ini for settings location */

View File

@ -19,6 +19,7 @@
#include "SetChunkData.h"
#include "Blocks/ChunkInterface.h"
#include "Entities/Pickup.h"
#include "DeadlockDetect.h"
#ifndef _WIN32
#include <cstdlib> // abs
@ -2783,6 +2784,24 @@ void cChunkMap::SetChunkAlwaysTicked(int a_ChunkX, int a_ChunkZ, bool a_AlwaysTi
void cChunkMap::TrackInDeadlockDetect(cDeadlockDetect & a_DeadlockDetect, const AString & a_WorldName)
{
a_DeadlockDetect.TrackCriticalSection(m_CSChunks, Printf("World %s chunkmap", a_WorldName.c_str()));
}
void cChunkMap::UntrackInDeadlockDetect(cDeadlockDetect & a_DeadlockDetect)
{
a_DeadlockDetect.UntrackCriticalSection(m_CSChunks);
}
void cChunkMap::FastSetBlock(int a_BlockX, int a_BlockY, int a_BlockZ, BLOCKTYPE a_BlockType, NIBBLETYPE a_BlockMeta)
{
int ChunkX, ChunkZ, X = a_BlockX, Y = a_BlockY, Z = a_BlockZ;

View File

@ -38,6 +38,7 @@ class cMobCensus;
class cMobSpawner;
class cSetChunkData;
class cBoundingBox;
class cDeadlockDetect;
typedef std::list<cClientHandle *> cClientHandleList;
typedef cChunk * cChunkPtr;
@ -411,6 +412,12 @@ public:
as at least one requests is active the chunk will be ticked). */
void SetChunkAlwaysTicked(int a_ChunkX, int a_ChunkZ, bool a_AlwaysTicked);
/** Adds this chunkmap's CS to the DeadlockDetect's tracked CSs. */
void TrackInDeadlockDetect(cDeadlockDetect & a_DeadlockDetect, const AString & a_WorldName);
/** Removes this chunkmap's CS from the DeadlockDetect's tracked CSs. */
void UntrackInDeadlockDetect(cDeadlockDetect & a_DeadlockDetect);
private:
// The chunks can manipulate neighbors while in their Tick() method, using LockedGetBlock() and LockedSetBlock()

View File

@ -30,6 +30,27 @@ cDeadlockDetect::cDeadlockDetect(void) :
cDeadlockDetect::~cDeadlockDetect()
{
// Check that all tracked CSs have been removed, report any remaining:
cCSLock lock(m_CS);
if (!m_TrackedCriticalSections.empty())
{
LOGWARNING("DeadlockDetect: Some CS objects (%u) haven't been removed from tracking", static_cast<unsigned>(m_TrackedCriticalSections.size()));
for (const auto & tcs: m_TrackedCriticalSections)
{
LOGWARNING(" CS %p / %s",
static_cast<void *>(tcs.first),
tcs.second.c_str()
);
}
}
}
bool cDeadlockDetect::Start(int a_IntervalSec)
{
m_IntervalSec = a_IntervalSec;
@ -61,6 +82,33 @@ bool cDeadlockDetect::Start(int a_IntervalSec)
void cDeadlockDetect::TrackCriticalSection(cCriticalSection & a_CS, const AString & a_Name)
{
cCSLock lock(m_CS);
m_TrackedCriticalSections.emplace_back(std::make_pair(&a_CS, a_Name));
}
void cDeadlockDetect::UntrackCriticalSection(cCriticalSection & a_CS)
{
cCSLock lock(m_CS);
for (auto itr = m_TrackedCriticalSections.begin(), end = m_TrackedCriticalSections.end(); itr != end; ++itr)
{
if (itr->first == &a_CS)
{
m_TrackedCriticalSections.erase(itr);
return;
}
}
}
void cDeadlockDetect::Execute(void)
{
// Loop until the signal to terminate:
@ -140,6 +188,7 @@ void cDeadlockDetect::DeadlockDetected(const AString & a_WorldName, Int64 a_Worl
LOGERROR("Deadlock detected: world %s has been stuck at age %lld. Aborting the server.",
a_WorldName.c_str(), static_cast<long long>(a_WorldAge)
);
ListTrackedCSs();
ASSERT(!"Deadlock detected");
abort();
}
@ -147,3 +196,19 @@ void cDeadlockDetect::DeadlockDetected(const AString & a_WorldName, Int64 a_Worl
void cDeadlockDetect::ListTrackedCSs(void)
{
cCSLock lock(m_CS);
for (const auto & cs: m_TrackedCriticalSections)
{
LOG("CS at %p, %s: RecursionCount = %d, ThreadIDHash = %04llx",
static_cast<void *>(cs.first), cs.second.c_str(),
cs.first->m_RecursionCount, static_cast<UInt64>(std::hash<std::thread::id>()(cs.first->m_OwningThreadID))
);
}
}

View File

@ -27,10 +27,20 @@ class cDeadlockDetect :
public:
cDeadlockDetect(void);
~cDeadlockDetect();
/** Starts the detection. Hides cIsThread's Start, because we need some initialization */
bool Start(int a_IntervalSec);
/** Adds the critical section for tracking.
Tracked CSs are listed, together with ownership details, when a deadlock is detected.
A tracked CS must be untracked before it is destroyed.
a_Name is an arbitrary name that is listed along with the CS in the output. */
void TrackCriticalSection(cCriticalSection & a_CS, const AString & a_Name);
/** Removes the CS from the tracking. */
void UntrackCriticalSection(cCriticalSection & a_CS);
protected:
struct sWorldAge
{
@ -44,6 +54,13 @@ protected:
/** Maps world name -> sWorldAge */
typedef std::map<AString, sWorldAge> WorldAges;
/** Protects m_TrackedCriticalSections from multithreaded access. */
cCriticalSection m_CS;
/** CriticalSections that are tracked (their status output on deadlock).
Protected against multithreaded access by m_CS. */
std::vector<std::pair<cCriticalSection *, AString>> m_TrackedCriticalSections;
WorldAges m_WorldAges;
/** Number of secods for which the ages must be the same for the detection to trigger */
@ -63,6 +80,9 @@ protected:
a_WorldName is the name of the world whose age has triggered the detection.
a_WorldAge is the age (in ticks) in which the world is stuck. */
NORETURN void DeadlockDetected(const AString & a_WorldName, Int64 a_WorldAge);
/** Outputs a listing of the tracked CSs, together with their name and state. */
void ListTrackedCSs();
} ;

View File

@ -9,12 +9,10 @@
////////////////////////////////////////////////////////////////////////////////
// cCriticalSection:
#ifdef _DEBUG
cCriticalSection::cCriticalSection()
cCriticalSection::cCriticalSection():
m_RecursionCount(0)
{
m_IsLocked = 0;
}
#endif // _DEBUG
@ -24,10 +22,8 @@ void cCriticalSection::Lock()
{
m_Mutex.lock();
#ifdef _DEBUG
m_IsLocked += 1;
m_OwningThreadID = std::this_thread::get_id();
#endif // _DEBUG
m_RecursionCount += 1;
m_OwningThreadID = std::this_thread::get_id();
}
@ -36,10 +32,8 @@ void cCriticalSection::Lock()
void cCriticalSection::Unlock()
{
#ifdef _DEBUG
ASSERT(m_IsLocked > 0);
m_IsLocked -= 1;
#endif // _DEBUG
ASSERT(IsLockedByCurrentThread());
m_RecursionCount -= 1;
m_Mutex.unlock();
}
@ -48,10 +42,9 @@ void cCriticalSection::Unlock()
#ifdef _DEBUG
bool cCriticalSection::IsLocked(void)
{
return (m_IsLocked > 0);
return (m_RecursionCount > 0);
}
@ -60,9 +53,8 @@ bool cCriticalSection::IsLocked(void)
bool cCriticalSection::IsLockedByCurrentThread(void)
{
return ((m_IsLocked > 0) && (m_OwningThreadID == std::this_thread::get_id()));
return ((m_RecursionCount > 0) && (m_OwningThreadID == std::this_thread::get_id()));
}
#endif // _DEBUG

View File

@ -9,27 +9,40 @@
class cCriticalSection
{
friend class cDeadlockDetect; // Allow the DeadlockDetect to read the internals, so that it may output some statistics
public:
void Lock(void);
void Unlock(void);
// IsLocked / IsLockedByCurrentThread are only used in ASSERT statements, but because of the changes with ASSERT they must always be defined
// The fake versions (in Release) will not effect the program in any way
#ifdef _DEBUG
cCriticalSection(void);
bool IsLocked(void);
bool IsLockedByCurrentThread(void);
#else
bool IsLocked(void) { return false; }
bool IsLockedByCurrentThread(void) { return false; }
#endif // _DEBUG
cCriticalSection(void);
/** Returns true if the CS is currently locked.
Note that since it relies on the m_RecursionCount value, it is inherently thread-unsafe, prone to false positives.
Also, due to multithreading, the state can change between this when function is evaluated and the returned value is used.
To be used in ASSERT(IsLocked()) only. */
bool IsLocked(void);
/** Returns true if the CS is currently locked by the thread calling this function.
Note that since it relies on the m_RecursionCount value, it is inherently thread-unsafe, prone to false positives.
Also, due to multithreading, the state can change between this when function is evaluated and the returned value is used.
To be used in ASSERT(IsLockedByCurrentThread()) only. */
bool IsLockedByCurrentThread(void);
private:
#ifdef _DEBUG
int m_IsLocked; // Number of times this CS is locked
/** Number of times that this CS is currently locked (levels of recursion). Zero if not locked.
Note that this value should be considered true only when the CS is locked; without the lock, it is UndefinedBehavior to even read it,
but making it std::atomic would impose too much of a runtime penalty.
It is only ever read without the lock in the DeadlockDetect, where the server is terminating anyway. */
int m_RecursionCount;
/** ID of the thread that is currently holding the CS.
Note that this value should be considered true only when the CS is locked; without the lock, it is UndefinedBehavior to even read it,
but making it std::atomic would impose too much of a runtime penalty.
When unlocked, the value stored here has no meaning, it may be an ID of a previous holder, or it could be any garbage.
It is only ever read without the lock in the DeadlockDetect, where the server is terminating anyway. */
std::thread::id m_OwningThreadID;
#endif // _DEBUG
std::recursive_mutex m_Mutex;
} ALIGN_8;

View File

@ -182,7 +182,7 @@ void cRoot::Start(std::unique_ptr<cSettingsRepositoryInterface> a_OverridesRepo)
LoadWorlds(*settingsRepo, IsNewIniFile);
LOGD("Loading plugin manager...");
m_PluginManager = new cPluginManager();
m_PluginManager = new cPluginManager(dd);
m_PluginManager->ReloadPluginsNow(*settingsRepo);
LOGD("Loading MonsterConfig...");
@ -193,7 +193,7 @@ void cRoot::Start(std::unique_ptr<cSettingsRepositoryInterface> a_OverridesRepo)
m_Authenticator.Start(*settingsRepo);
LOGD("Starting worlds...");
StartWorlds();
StartWorlds(dd);
if (settingsRepo->GetValueSetB("DeadlockDetect", "Enabled", true))
{
@ -248,7 +248,7 @@ void cRoot::Start(std::unique_ptr<cSettingsRepositoryInterface> a_OverridesRepo)
dd.Stop();
LOGD("Stopping world threads...");
StopWorlds();
StopWorlds(dd);
LOGD("Stopping authenticator...");
m_Authenticator.Stop();
@ -486,11 +486,11 @@ void cRoot::LoadWorlds(cSettingsRepositoryInterface & a_Settings, bool a_IsNewIn
void cRoot::StartWorlds(void)
void cRoot::StartWorlds(cDeadlockDetect & a_DeadlockDetect)
{
for (WorldMap::iterator itr = m_WorldsByName.begin(); itr != m_WorldsByName.end(); ++itr)
{
itr->second->Start();
itr->second->Start(a_DeadlockDetect);
itr->second->InitializeSpawn();
m_PluginManager->CallHookWorldStarted(*itr->second);
}
@ -500,11 +500,11 @@ void cRoot::StartWorlds(void)
void cRoot::StopWorlds(void)
void cRoot::StopWorlds(cDeadlockDetect & a_DeadlockDetect)
{
for (WorldMap::iterator itr = m_WorldsByName.begin(); itr != m_WorldsByName.end(); ++itr)
{
itr->second->Stop();
itr->second->Stop(a_DeadlockDetect);
}
}

View File

@ -27,6 +27,7 @@ class cPlayer;
class cCommandOutputCallback;
class cCompositeChat;
class cSettingsRepositoryInterface;
class cDeadlockDetect;
typedef cItemCallback<cPlayer> cPlayerListCallback;
typedef cItemCallback<cWorld> cWorldListCallback;
@ -226,10 +227,10 @@ private:
void LoadWorlds(cSettingsRepositoryInterface & a_Settings, bool a_IsNewIniFile);
/** Starts each world's life */
void StartWorlds(void);
void StartWorlds(cDeadlockDetect & a_DeadlockDetect);
/** Stops each world's threads, so that it's safe to unload them */
void StopWorlds(void);
void StopWorlds(cDeadlockDetect & a_DeadlockDetect);
/** Unloads all worlds from memory */
void UnloadWorlds(void);

View File

@ -12,6 +12,7 @@
#include "ChunkMap.h"
#include "Generating/ChunkDesc.h"
#include "SetChunkData.h"
#include "DeadlockDetect.h"
// Serializers
#include "WorldStorage/ScoreboardSerializer.h"
@ -433,8 +434,13 @@ void cWorld::InitializeSpawn(void)
void cWorld::Start(void)
void cWorld::Start(cDeadlockDetect & a_DeadlockDetect)
{
// Track the CSs used by this world in the deadlock detector:
a_DeadlockDetect.TrackCriticalSection(m_CSClients, Printf("World %s clients", m_WorldName.c_str()));
a_DeadlockDetect.TrackCriticalSection(m_CSPlayers, Printf("World %s players", m_WorldName.c_str()));
a_DeadlockDetect.TrackCriticalSection(m_CSTasks, Printf("World %s tasks", m_WorldName.c_str()));
m_SpawnX = 0;
m_SpawnY = cChunkDef::Height;
m_SpawnZ = 0;
@ -603,6 +609,7 @@ void cWorld::Start(void)
SetTimeOfDay(IniFile.GetValueSetI("General", "TimeInTicks", GetTimeOfDay()));
m_ChunkMap = cpp14::make_unique<cChunkMap>(this);
m_ChunkMap->TrackInDeadlockDetect(a_DeadlockDetect, m_WorldName);
// preallocate some memory for ticking blocks so we don't need to allocate that often
m_BlockTickQueue.reserve(1000);
@ -953,7 +960,7 @@ void cWorld::InitialiseAndLoadMobSpawningValues(cIniFile & a_IniFile)
void cWorld::Stop(void)
void cWorld::Stop(cDeadlockDetect & a_DeadlockDetect)
{
// Delete the clients that have been in this world:
{
@ -990,6 +997,11 @@ void cWorld::Stop(void)
m_Generator.Stop();
m_ChunkSender.Stop();
m_Storage.Stop();
a_DeadlockDetect.UntrackCriticalSection(m_CSClients);
a_DeadlockDetect.UntrackCriticalSection(m_CSPlayers);
a_DeadlockDetect.UntrackCriticalSection(m_CSTasks);
m_ChunkMap->UntrackInDeadlockDetect(a_DeadlockDetect);
}

View File

@ -58,7 +58,7 @@ class cCompositeChat;
class cCuboid;
class cSetChunkData;
class cBroadcaster;
class cDeadlockDetect;
typedef std::list< cPlayer * > cPlayerList;
typedef std::list< std::pair< cPlayer *, cWorld * > > cAwaitingPlayerList;
@ -703,11 +703,13 @@ public:
void InitializeSpawn(void);
/** Starts threads that belong to this world */
void Start(void);
/** Starts threads that belong to this world.
a_DeadlockDetect is used for tracking this world's age, detecting a possible deadlock. */
void Start(cDeadlockDetect & a_DeadlockDetect);
/** Stops threads that belong to this world (part of deinit) */
void Stop(void);
/** Stops threads that belong to this world (part of deinit).
a_DeadlockDetect is used for tracking this world's age, detecting a possible deadlock. */
void Stop(cDeadlockDetect & a_DeadlockDetect);
/** Processes the blocks queued for ticking with a delay (m_BlockTickQueue[]) */
void TickQueuedBlocks(void);

View File

@ -13,6 +13,7 @@
#include "BlockEntities/BlockEntity.h"
#include "Blocks/BlockHandler.h"
#include "Generating/ChunkDesc.h"
#include "DeadlockDetect.h"
@ -271,3 +272,19 @@ cBlockEntity * cBlockEntity::CreateByBlockType(BLOCKTYPE a_BlockType, NIBBLETYPE
void cDeadlockDetect::TrackCriticalSection(cCriticalSection & a_CS, const AString & a_Name)
{
}
void cDeadlockDetect::UntrackCriticalSection(cCriticalSection & a_CS)
{
}

View File

@ -13,6 +13,7 @@
#include "BlockEntities/BlockEntity.h"
#include "Blocks/BlockHandler.h"
#include "Generating/ChunkDesc.h"
#include "DeadlockDetect.h"
@ -271,3 +272,19 @@ cBlockEntity * cBlockEntity::CreateByBlockType(BLOCKTYPE a_BlockType, NIBBLETYPE
void cDeadlockDetect::TrackCriticalSection(cCriticalSection & a_CS, const AString & a_Name)
{
}
void cDeadlockDetect::UntrackCriticalSection(cCriticalSection & a_CS)
{
}