Merge remote-tracking branch 'origin/fix-profiler' into game_protocol

This commit is contained in:
hiker 2017-08-09 17:43:14 +10:00
commit 379a9db3c9
6 changed files with 489 additions and 335 deletions

View File

@ -24,6 +24,7 @@
#include "io/file_manager.hpp"
#include "modes/world.hpp"
#include "race/race_manager.hpp"
#include "utils/profiler.hpp"
#include "utils/vs.hpp"
#include <pthread.h>
@ -46,6 +47,17 @@
# endif
#endif
// Define this if the profiler should also collect data of the sfx manager
#undef ENABLE_PROFILING_FOR_SFX_MANAGER
#ifndef ENABLE_PROFILING_FOR_SFX_MANAGER
// Otherwise ignore the profiler push/pop events
// Use undef to remove preprocessor warning
# undef PROFILER_PUSH_CPU_MARKER
# undef PROFILER_POP_CPU_MARKER
# define PROFILER_PUSH_CPU_MARKER(name, r, g, b)
# define PROFILER_POP_CPU_MARKER()
#endif
SFXManager *SFXManager::m_sfx_manager;
// ----------------------------------------------------------------------------
@ -302,6 +314,7 @@ void* SFXManager::mainLoop(void *obj)
while (me->m_sfx_commands.getData().empty() ||
me->m_sfx_commands.getData().front()->m_command!=SFX_EXIT)
{
PROFILER_PUSH_CPU_MARKER("Wait", 255, 0, 0);
bool empty = me->m_sfx_commands.getData().empty();
// Wait in cond_wait for a request to arrive. The 'while' is necessary
@ -321,6 +334,8 @@ void* SFXManager::mainLoop(void *obj)
break;
}
me->m_sfx_commands.unlock();
PROFILER_POP_CPU_MARKER();
PROFILER_PUSH_CPU_MARKER("Execute", 0, 255, 0);
switch (current->m_command)
{
case SFX_PLAY: current->m_sfx->reallyPlayNow(); break;
@ -383,6 +398,8 @@ void* SFXManager::mainLoop(void *obj)
}
delete current;
current = NULL;
PROFILER_POP_CPU_MARKER();
PROFILER_PUSH_CPU_MARKER("yield", 0, 0, 255);
// We access the size without lock, doesn't matter if we
// should get an incorrect value because of concurrent read/writes
if (me->m_sfx_commands.getData().size() == 0)
@ -395,7 +412,7 @@ void* SFXManager::mainLoop(void *obj)
me->queue(SFX_UPDATE, (SFXBase*)NULL, float(t));
}
me->m_sfx_commands.lock();
PROFILER_POP_CPU_MARKER();
} // while
// Signal that the sfx manager can now be deleted.

View File

@ -660,6 +660,10 @@ namespace UserConfigParams
/** True if graphical profiler should be displayed */
PARAM_PREFIX bool m_profiler_enabled PARAM_DEFAULT( false );
/** How many seconds worth of data the circular profile buffer
* can store. */
PARAM_PREFIX float m_profiler_buffer_duration PARAM_DEFAULT(20.0f);
// not saved to file
// ---- Networking

View File

@ -51,8 +51,8 @@ static GLuint generateRTT(const core::dimension2du &res, GLint internalFormat, G
RTT::RTT(size_t width, size_t height, float rtt_scale)
{
m_width = width * rtt_scale;
m_height = height * rtt_scale;
m_width = size_t(width * rtt_scale);
m_height = size_t(height * rtt_scale);
m_shadow_FBO = NULL;
m_RH_FBO = NULL;
m_RSM = NULL;

View File

@ -88,7 +88,7 @@ enum DebugMenuCommand
DEBUG_GRAPHICS_BULLET_2,
DEBUG_GRAPHICS_BOUNDING_BOXES_VIZ,
DEBUG_PROFILER,
DEBUG_PROFILER_GENERATE_REPORT,
DEBUG_PROFILER_WRITE_REPORT,
DEBUG_FONT_DUMP_GLYPH_PAGE,
DEBUG_FONT_RELOAD,
DEBUG_FPS,
@ -346,11 +346,10 @@ bool handleContextMenuAction(s32 cmd_id)
irr_driver->toggleBoundingBoxesViz();
break;
case DEBUG_PROFILER:
UserConfigParams::m_profiler_enabled =
!UserConfigParams::m_profiler_enabled;
profiler.toggleStatus();
break;
case DEBUG_PROFILER_GENERATE_REPORT:
profiler.setCaptureReport(!profiler.getCaptureReport());
case DEBUG_PROFILER_WRITE_REPORT:
profiler.writeToFile();
break;
case DEBUG_THROTTLE_FPS:
main_loop->setThrottleFPS(false);
@ -822,8 +821,8 @@ bool onEvent(const SEvent &event)
mnu->addItem(L"Profiler", DEBUG_PROFILER);
if (UserConfigParams::m_profiler_enabled)
mnu->addItem(L"Toggle capture profiler report",
DEBUG_PROFILER_GENERATE_REPORT);
mnu->addItem(L"Save profiler report",
DEBUG_PROFILER_WRITE_REPORT);
mnu->addItem(L"Do not limit FPS", DEBUG_THROTTLE_FPS);
mnu->addItem(L"Toggle FPS", DEBUG_FPS);
mnu->addItem(L"Save replay", DEBUG_SAVE_REPLAY);

View File

@ -16,6 +16,8 @@
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "profiler.hpp"
#include "config/user_config.hpp"
#include "graphics/glwrap.hpp"
#include "graphics/irr_driver.hpp"
#include "graphics/2dutils.hpp"
@ -24,13 +26,14 @@
#include "graphics/irr_driver.hpp"
#include "guiengine/scalable_font.hpp"
#include "io/file_manager.hpp"
#include "utils/string_utils.hpp"
#include "utils/vs.hpp"
#include <assert.h>
#include <stack>
#include <sstream>
#include <algorithm>
#include <fstream>
#include <ostream>
#include <stack>
#include <sstream>
static const char* GPU_Phase[Q_LAST] =
{
@ -107,15 +110,23 @@ Profiler profiler;
//-----------------------------------------------------------------------------
Profiler::Profiler()
{
m_thread_infos.resize(1); // TODO: monothread now, should support multithreading
m_write_id = 0;
m_time_last_sync = getTimeMilliseconds();
m_time_between_sync = 0.0;
m_freeze_state = UNFROZEN;
m_capture_report = false;
m_first_capture_sweep = true;
m_first_gpu_capture_sweep = true;
m_capture_report_buffer = NULL;
m_time_last_sync = getTimeMilliseconds();
m_time_between_sync = 0.0;
m_freeze_state = UNFROZEN;
m_max_frames = int( UserConfigParams::m_profiler_buffer_duration
* UserConfigParams::m_max_fps );
m_current_frame = 0;
m_has_wrapped_around = false;
const int MAX_THREADS = 10;
m_all_threads_data.resize(MAX_THREADS);
m_thread_mapping.resize(MAX_THREADS);
// Add this thread to the thread mapping
m_thread_mapping[0] = pthread_self();
m_threads_used = 1;
m_gpu_times.resize(Q_LAST*m_max_frames);
} // Profile
//-----------------------------------------------------------------------------
@ -124,152 +135,183 @@ Profiler::~Profiler()
} // ~Profiler
//-----------------------------------------------------------------------------
/** Enables or disables capturing data. if capturing is getting disabled, and
* was enabled, it will write out all captured data to two files (general
* and GPU data).
*/
void Profiler::setCaptureReport(bool capture_report)
/** Returns a unique index for a thread. If the calling thread is not yet in
* the mapping, it will assign a new unique id to this thread. This function
* is NOT thread-safe and must be called from a properly protected code
* section. */
int Profiler::getThreadID()
{
if (!m_capture_report && capture_report)
pthread_t thread = pthread_self();
int i = 0;
while(i < m_threads_used)
{
m_capture_report = true;
m_first_capture_sweep = true;
m_first_gpu_capture_sweep = true;
// TODO: a 20 MB hardcoded buffer for now. That should amply suffice for
// all reasonable purposes. But it's not too clean to hardcode
m_capture_report_buffer = new StringBuffer(20 * 1024 * 1024);
m_gpu_capture_report_buffer = new StringBuffer(20 * 1024 * 1024);
}
else if (m_capture_report && !capture_report)
{
std::string filename
= file_manager->getUserConfigFile(file_manager->getStdoutName());
// when disabling capture to file, flush captured data to a file
if (memcmp( &m_thread_mapping[i],
&thread,
sizeof(thread)) ==0 )
{
std::ofstream filewriter(filename+"-profile.csv",
std::ios::out | std::ios::binary);
const char* str = m_capture_report_buffer->getRawBuffer();
filewriter.write(str, strlen(str));
}
{
std::ofstream filewriter(filename+"-profile-gpu.csv",
std::ios::out | std::ios::binary);
const char* str = m_gpu_capture_report_buffer->getRawBuffer();
filewriter.write(str, strlen(str));
return i;
}
i++;
} // for i <m_threads_used
m_capture_report = false;
assert(m_threads_used < (int)m_thread_mapping.size());
m_thread_mapping[m_threads_used] = thread;
m_threads_used++;
delete m_capture_report_buffer;
m_capture_report_buffer = NULL;
delete m_gpu_capture_report_buffer;
m_gpu_capture_report_buffer = NULL;
}
} // setCaptureReport
return m_threads_used - 1;
} // getThreadID
//-----------------------------------------------------------------------------
/// Push a new marker that starts now
void Profiler::pushCpuMarker(const char* name, const video::SColor& color)
void Profiler::pushCPUMarker(const char* name, const video::SColor& colour)
{
// Don't do anything when frozen
if (m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
// Don't do anything when disabled or frozen
if (!UserConfigParams::m_profiler_enabled ||
m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE )
return;
ThreadInfo& ti = getThreadInfo();
MarkerStack& markers_stack = ti.m_markers_stack[m_write_id];
double start = getTimeMilliseconds() - m_time_last_sync;
size_t layer = markers_stack.size();
// We need to look before getting the thread id (since this might
// be a new thread which changes the structure).
m_lock.lock();
int thread_id = getThreadID();
// Add to the stack of current markers
markers_stack.push(Marker(start, -1.0, name, color, layer));
} // pushCpuMarker
ThreadData &td = m_all_threads_data[thread_id];
AllEventData::iterator i = td.m_all_event_data.find(name);
double start = getTimeMilliseconds() - m_time_last_sync;
if (i != td.m_all_event_data.end())
{
i->second.setStart(m_current_frame, start, td.m_event_stack.size());
}
else
{
EventData ed(colour, m_max_frames);
ed.setStart(m_current_frame, start, td.m_event_stack.size());
td.m_all_event_data[name] = ed;
// Ordered headings is used to determine the order in which the
// bar graph is drawn. Outer profiling events will be added first,
// so they will be drawn first, which gives the proper nested
// displayed of events.
td.m_ordered_headings.push_back(name);
}
td.m_event_stack.push_back(name);
m_lock.unlock();
} // pushCPUMarker
//-----------------------------------------------------------------------------
/// Stop the last pushed marker
void Profiler::popCpuMarker()
void Profiler::popCPUMarker()
{
// Don't do anything when frozen
if(m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
// Don't do anything when disabled or frozen
if( !UserConfigParams::m_profiler_enabled ||
m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE )
return;
double now = getTimeMilliseconds();
ThreadInfo& ti = getThreadInfo();
assert(ti.m_markers_stack[m_write_id].size() > 0);
m_lock.lock();
int thread_id = getThreadID();
ThreadData &td = m_all_threads_data[thread_id];
MarkerStack& markers_stack = ti.m_markers_stack[m_write_id];
MarkerList& markers_done = ti.m_markers_done[m_write_id];
// When the profiler gets enabled (which happens in the middle of the
// main loop), there can be some pops without matching pushes (for one
// frame) - ignore those events.
if (td.m_event_stack.size() == 0)
{
m_lock.unlock();
return;
}
// Update the date of end of the marker
Marker& marker = markers_stack.top();
marker.m_end = getTimeMilliseconds() - m_time_last_sync;
assert(td.m_event_stack.size() > 0);
// Remove the marker from the stack and add it to the list of markers done
markers_done.push_front(marker);
markers_stack.pop();
} // popCpuMarker
const std::string &name = td.m_event_stack.back();
td.m_all_event_data[name].setEnd(m_current_frame, now - m_time_last_sync);
td.m_event_stack.pop_back();
m_lock.unlock();
} // popCPUMarker
//-----------------------------------------------------------------------------
/// Swap buffering for the markers
/** Switches the profiler either on or off.
*/
void Profiler::toggleStatus()
{
UserConfigParams::m_profiler_enabled = !UserConfigParams::m_profiler_enabled;
// If the profiler would immediately enabled, calls that have started but
// not finished would not be registered correctly. So set the state to
// waiting, so the unfreeze started at the next sync frame (which is
// outside of the main loop, i.e. all profiling events inside of the main
// loop will work as expected.
if (m_freeze_state == UNFROZEN)
m_freeze_state = WAITING_FOR_UNFREEZE;
} // toggleStatus
//-----------------------------------------------------------------------------
/** Saves all data for the current frame, and starts the next frame in the
* circular buffer. Any events that are currently active (e.g. in a separate
* thread) will be split in two parts: the beginning (till now) in the current
* frame, the rest will be added to the next frame.
*/
void Profiler::synchronizeFrame()
{
// Don't do anything when frozen
if(m_freeze_state == FROZEN)
if(!UserConfigParams::m_profiler_enabled || m_freeze_state == FROZEN)
return;
// Avoid using several times getTimeMilliseconds(), which would yield different results
// Avoid using several times getTimeMilliseconds(),
// which would yield different results
double now = getTimeMilliseconds();
// Swap buffers
int old_write_id = m_write_id;
m_write_id = 1-m_write_id;
// For each thread:
ThreadInfoList::iterator it_end = m_thread_infos.end();
for(ThreadInfoList::iterator it = m_thread_infos.begin() ; it != it_end ; it++)
m_lock.lock();
// Set index to next frame
int next_frame = m_current_frame+1;
if (next_frame >= m_max_frames)
{
// Get the thread information
ThreadInfo& ti = *it;
MarkerList& old_markers_done = ti.m_markers_done[old_write_id];
MarkerStack& old_markers_stack = ti.m_markers_stack[old_write_id];
MarkerList& new_markers_done = ti.m_markers_done[m_write_id];
MarkerStack& new_markers_stack = ti.m_markers_stack[m_write_id];
// Clear the containers for the new frame
new_markers_done.clear();
while(!new_markers_stack.empty())
new_markers_stack.pop();
// Finish the markers in the stack of the previous frame
// and start them for the next frame.
// For each marker in the old stack:
while(!old_markers_stack.empty())
{
// - finish the marker for the previous frame and add it to the old "done" list
Marker& m = old_markers_stack.top();
m.m_end = now - m_time_last_sync;
old_markers_done.push_front(m);
// - start a new one for the new frame
Marker new_marker(0.0, -1.0, m.m_name.c_str(), m.m_color);
new_markers_stack.push(new_marker);
// - next iteration
old_markers_stack.pop();
}
next_frame = 0;
m_has_wrapped_around = true;
}
// First finish all markers that are currently in progress, and add
// a new start marker for the next frame. So e.g. if a thread is busy in
// one event while the main thread syncs the frame, this event will get
// split into two parts in two consecutive frames
for (int i = 0; i < m_threads_used; i++)
{
ThreadData &td = m_all_threads_data[i];
for(unsigned int j=0; j<td.m_event_stack.size(); j++)
{
EventData &ed = td.m_all_event_data[td.m_event_stack[j]];
ed.setEnd(m_current_frame, now-m_time_last_sync);
ed.setStart(next_frame, 0, j);
} // for j in event stack
} // for i in threads
if (m_has_wrapped_around)
{
// The new entries for the circular buffer need to be cleared
// to make sure the new values are not accumulated on top of
// the data from a previous frame.
for (int i = 0; i < m_threads_used; i++)
{
ThreadData &td = m_all_threads_data[i];
AllEventData &aed = td.m_all_event_data;
AllEventData::iterator k;
for (k = aed.begin(); k != aed.end(); ++k)
k->second.getMarker(next_frame).clear();
}
} // is has wrapped around
m_current_frame = next_frame;
// Remember the date of last synchronization
m_time_between_sync = now - m_time_last_sync;
m_time_last_sync = now;
m_time_last_sync = now;
// Freeze/unfreeze as needed
if(m_freeze_state == WAITING_FOR_FREEZE)
m_freeze_state = FROZEN;
else if(m_freeze_state == WAITING_FOR_UNFREEZE)
m_freeze_state = UNFROZEN;
m_lock.unlock();
} // synchronizeFrame
//-----------------------------------------------------------------------------
@ -279,120 +321,105 @@ void Profiler::draw()
#ifndef SERVER_ONLY
PROFILER_PUSH_CPU_MARKER("ProfilerDraw", 0xFF, 0xFF, 0x00);
video::IVideoDriver* driver = irr_driver->getVideoDriver();
std::stack<Marker> hovered_markers;
// Current frame points to the frame in which currently data is
// being accumulated. Draw the previous (i.e. complete) frame.
m_lock.lock();
int indx = m_current_frame - 1;
if (indx < 0) indx = m_max_frames - 1;
m_lock.unlock();
drawBackground();
// Force to show the pointer
irr_driver->showPointer();
int read_id = 1-m_write_id;
// Compute some values for drawing (unit: pixels, but we keep floats
// for reducing errors accumulation)
core::dimension2d<u32> screen_size = driver->getScreenSize();
core::dimension2d<u32> screen_size = driver->getScreenSize();
const double profiler_width = (1.0 - 2.0*MARGIN_X) * screen_size.Width;
const double x_offset = MARGIN_X*screen_size.Width;
const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
const double line_height = LINE_HEIGHT*screen_size.Height;
size_t nb_thread_infos = m_thread_infos.size();
const double x_offset = MARGIN_X*screen_size.Width;
const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
const double line_height = LINE_HEIGHT*screen_size.Height;
double start = -1.0f;
double end = -1.0f;
for (size_t i = 0; i < nb_thread_infos; i++)
// Compute start end end time for this frame
double start = 99999.0f;
double end = -1.0f;
// Use this thread (thread 0) to compute start and end time. All other
// threads might have 'unfinished' events, or multiple identical events
// in this frame (i.e. start time would be incorrect(.
AllEventData &aed = m_all_threads_data[0].m_all_event_data;
AllEventData::iterator j;
for (j = aed.begin(); j != aed.end(); ++j)
{
MarkerList& markers = m_thread_infos[i].m_markers_done[read_id];
const Marker &marker = j->second.getMarker(indx);
start = std::min(start, marker.getStart());
end = std::max(end, marker.getEnd());
} // for j in events
MarkerList::const_iterator it_end = markers.end();
for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++)
{
const Marker& m = *it;
if (start < 0.0) start = m.m_start;
else start = std::min(start, m.m_start);
if (end < 0.0) end = m.m_end;
else end = std::max(end, m.m_end);
}
}
const double duration = end - start;
const double factor = profiler_width / duration;
// Get the mouse pos
core::vector2di mouse_pos = GUIEngine::EventHandler::get()->getMousePos();
// For each thread:
for (size_t i = 0; i < nb_thread_infos; i++)
std::stack<AllEventData::iterator> hovered_markers;
for (int i = 0; i < m_threads_used; i++)
{
// Draw all markers
MarkerList& markers = m_thread_infos[i].m_markers_done[read_id];
ThreadData &td = m_all_threads_data[i];
AllEventData &aed = td.m_all_event_data;
if (markers.empty())
continue;
if (m_capture_report)
// Thread 1 has 'proper' start and end events (assuming that each
// event is at most called once). But all other threads might have
// multiple start and end events, so the recorder start time is only
// of the last event and so can not be used to draw the bar graph
double start_xpos = 0;
for(int k=0; k<(int)td.m_ordered_headings.size(); k++)
{
if (m_first_capture_sweep)
m_capture_report_buffer->getStdStream() << "\"Thread(1)\" ";
else
m_capture_report_buffer->getStdStream() << i << " ";
}
MarkerList::const_iterator it_end = markers.end();
int count = 2;
for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++)
{
const Marker& m = *it;
assert(m.m_end >= 0.0);
if (m_capture_report)
{
if (m_first_capture_sweep)
m_capture_report_buffer->getStdStream() << "\"" << m.m_name
<< "(" << count++ << ")\" ";
else
m_capture_report_buffer->getStdStream()
<< (int)round((m.m_end - m.m_start) * 1000) << " ";
}
core::rect<s32> pos((s32)( x_offset + factor*m.m_start ),
(s32)( y_offset + i*line_height ),
(s32)( x_offset + factor*m.m_end ),
(s32)( y_offset + (i+1)*line_height ));
AllEventData::iterator j = aed.find(td.m_ordered_headings[k]);
const Marker &marker = j->second.getMarker(indx);
if (i == 0)
start_xpos = factor*marker.getStart();
core::rect<s32> pos((s32)(x_offset + start_xpos),
(s32)(y_offset + i*line_height),
(s32)(x_offset + start_xpos
+ factor*marker.getDuration()),
(s32)(y_offset + (i + 1)*line_height) );
if (i != 0)
start_xpos += factor*marker.getDuration();
// Reduce vertically the size of the markers according to their layer
pos.UpperLeftCorner.Y += m.m_layer*2;
pos.LowerRightCorner.Y -= m.m_layer*2;
GL32_draw2DRectangle(m.m_color, pos);
pos.UpperLeftCorner.Y += 2 * marker.getLayer();
pos.LowerRightCorner.Y -= 2 * marker.getLayer();
GL32_draw2DRectangle(j->second.getColour(), pos);
// If the mouse cursor is over the marker, get its information
if(pos.isPointInside(mouse_pos))
hovered_markers.push(m);
}
if (pos.isPointInside(mouse_pos))
{
hovered_markers.push(j);
}
} // for j in AllEventdata
} // for i in threads
if (m_capture_report)
{
m_capture_report_buffer->getStdStream() << "\n";
m_first_capture_sweep = false;
}
}
// GPU profiler
QueryPerf hovered_gpu_marker = Q_LAST;
long hovered_gpu_marker_elapsed = 0;
int gpu_y = int(y_offset + nb_thread_infos*line_height + line_height/2);
int gpu_y = int(y_offset + m_threads_used*line_height + line_height/2);
float total = 0;
unsigned int gpu_timers[Q_LAST];
for (unsigned i = 0; i < Q_LAST; i++)
{
#ifndef SERVER_ONLY
gpu_timers[i] = irr_driver->getGPUTimer(i).elapsedTimeus();
int n = irr_driver->getGPUTimer(i).elapsedTimeus();
m_gpu_times[indx*Q_LAST + i] = n;
total += n;
#endif
total += gpu_timers[i];
}
static video::SColor colors[] = {
video::SColor(255, 255, 0, 0),
video::SColor(255, 0, 255, 0),
@ -410,7 +437,7 @@ void Profiler::draw()
//Log::info("GPU Perf", "Phase %d : %d us\n", i,
// irr_driver->getGPUTimer(i).elapsedTimeus());
float elapsed = float(gpu_timers[i]);
float elapsed = float(m_gpu_times[indx*Q_LAST+i]);
core::rect<s32> pos((s32)(x_offset + (curr_val / total)*profiler_width),
(s32)(y_offset + gpu_y),
(s32)(x_offset + ((curr_val + elapsed) / total)*profiler_width),
@ -422,30 +449,18 @@ void Profiler::draw()
if (pos.isPointInside(mouse_pos))
{
hovered_gpu_marker = (QueryPerf)i;
hovered_gpu_marker_elapsed = gpu_timers[i];
hovered_gpu_marker_elapsed = m_gpu_times[indx*Q_LAST+i];
}
if (m_capture_report)
{
if (m_first_gpu_capture_sweep)
m_gpu_capture_report_buffer->getStdStream() << GPU_Phase[i] << ";";
else
m_gpu_capture_report_buffer->getStdStream() << elapsed << ";";
}
}
if (m_capture_report)
{
m_gpu_capture_report_buffer->getStdStream() << "\n";
m_first_gpu_capture_sweep = false;
}
}
// Draw the end of the frame
{
s32 x_sync = (s32)(x_offset + factor*m_time_between_sync);
s32 y_up_sync = (s32)(MARGIN_Y*screen_size.Height);
s32 y_down_sync = (s32)( (MARGIN_Y + (2+nb_thread_infos)*LINE_HEIGHT)
s32 y_down_sync = (s32)( (MARGIN_Y + (2+m_threads_used)*LINE_HEIGHT)
* screen_size.Height );
GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00),
@ -460,12 +475,13 @@ void Profiler::draw()
core::stringw text;
while(!hovered_markers.empty())
{
Marker& m = hovered_markers.top();
AllEventData::iterator j = hovered_markers.top();
const Marker &marker = j->second.getMarker(indx);
std::ostringstream oss;
oss.precision(4);
oss << m.m_name << " [" << (m.m_end - m.m_start) << " ms / ";
oss << j->first << " [" << (marker.getDuration()) << " ms / ";
oss.precision(3);
oss << (m.m_end - m.m_start)*100.0 / duration << "%]" << std::endl;
oss << marker.getDuration()*100.0 / duration << "%]" << std::endl;
text += oss.str().c_str();
hovered_markers.pop();
}
@ -481,12 +497,6 @@ void Profiler::draw()
}
}
if (m_capture_report)
{
font->draw("Capturing profiler report...", MARKERS_NAMES_POS,
video::SColor(0xFF, 0x00, 0x90, 0x00));
}
PROFILER_POP_CPU_MARKER();
#endif
} // draw
@ -545,3 +555,61 @@ void Profiler::drawBackground()
#endif
} // drawBackground
//-----------------------------------------------------------------------------
/** Saves the collected profile data to a file. Filename is based on the
* stdout name (with -profile appended).
*/
void Profiler::writeToFile()
{
m_lock.lock();
std::string base_name =
file_manager->getUserConfigFile(file_manager->getStdoutName());
// First CPU data
for (int thread_id = 0; thread_id < m_threads_used; thread_id++)
{
std::ofstream f(base_name + ".profile-cpu-" +
StringUtils::toString(thread_id) );
ThreadData &td = m_all_threads_data[thread_id];
f << "# ";
for (unsigned int i = 0; i < td.m_ordered_headings.size(); i++)
f << "\"" << td.m_ordered_headings[i] << "(" << i+1 <<")\" ";
f << std::endl;
int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
if (start > m_max_frames) start -= m_max_frames;
while (start != m_current_frame)
{
for (unsigned int i = 0; i < td.m_ordered_headings.size(); i++)
{
const EventData &ed = td.m_all_event_data[td.m_ordered_headings[i]];
f << int(ed.getMarker(start).getDuration()*1000) << " ";
} // for i i new_headings
f << std::endl;
start = (start + 1) % m_max_frames;
} // while start != m_current_frame
f.close();
} // for all thread_ids
std::ofstream f_gpu(base_name + ".profile-gpu");
f_gpu << "# ";
for (unsigned i = 0; i < Q_LAST; i++)
{
f_gpu << "\"" << GPU_Phase[i] << "(" << i+1 << ")\" ";
} // for i < Q_LAST
f_gpu << std::endl;
int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
if (start > m_max_frames) start -= m_max_frames;
while (start != m_current_frame)
{
for (unsigned i = 0; i < Q_LAST; i++)
{
f_gpu << m_gpu_times[start*Q_LAST + i] << " ";
} // for i < Q_LAST
f_gpu << std::endl;
start = (start + 1) % m_max_frames;
}
f_gpu.close();
m_lock.unlock();
} // writeFile

View File

@ -18,15 +18,20 @@
#ifndef PROFILER_HPP
#define PROFILER_HPP
#include <irrlicht.h>
#include <list>
#include <vector>
#include <stack>
#include <string>
#include <streambuf>
#include <ostream>
#include <iostream>
#include "utils/synchronised.hpp"
#include <irrlicht.h>
#include <pthread.h>
#include <assert.h>
#include <iostream>
#include <list>
#include <map>
#include <ostream>
#include <stack>
#include <streambuf>
#include <string>
#include <vector>
enum QueryPerf
{
@ -70,10 +75,10 @@ double getTimeMilliseconds();
#ifdef ENABLE_PROFILER
#define PROFILER_PUSH_CPU_MARKER(name, r, g, b) \
profiler.pushCpuMarker(name, video::SColor(0xFF, r, g, b))
profiler.pushCPUMarker(name, video::SColor(0xFF, r, g, b))
#define PROFILER_POP_CPU_MARKER() \
profiler.popCpuMarker()
profiler.popCPUMarker()
#define PROFILER_SYNC_FRAME() \
profiler.synchronizeFrame()
@ -90,105 +95,176 @@ double getTimeMilliseconds();
using namespace irr;
// ============================================================================
/** For profiling reports, we need a custom strijng stream that writes to a
* large pre-allocated buffer, to avoid allocating as much as possible during
* profiling. */
template <typename char_type>
struct ostreambuf : public std::basic_streambuf<char_type,
std::char_traits<char_type> >
{
ostreambuf(char_type* buffer, std::streamsize bufferLength)
{
// set the "put" pointer the start of the buffer and record it's length.
this->setp(buffer, buffer + bufferLength);
}
}; // basic_streambuf
// ============================================================================
class StringBuffer
{
private:
char* m_buffer;
ostreambuf<char> ostreamBuffer;
std::ostream messageStream;
public:
StringBuffer(unsigned int size) : m_buffer((char*)calloc(size, 1)),
ostreamBuffer(m_buffer, size),
messageStream(&ostreamBuffer)
{
} // StringBuffer
// ------------------------------------------------------------------------
~StringBuffer()
{
free(m_buffer);
}
// ------------------------------------------------------------------------
std::ostream& getStdStream() { return messageStream; }
// ------------------------------------------------------------------------
char* getRawBuffer() { return m_buffer; }
}; // class StringBuffer
// ============================================================================
/**
* \brief class that allows run-time graphical profiling through the use of markers
/** \brief class that allows run-time graphical profiling through the use
* of markers.
* \ingroup utils
*/
class Profiler
{
private:
// ------------------------------------------------------------------------
struct Marker
class Marker
{
/** Start time of marker (relative to time of last synch). */
private:
/** An event that is started (pushed) stores the start time in this
* variable. */
double m_start;
/** End time of marker (relative to time of last synch). */
double m_end;
/** Duration of the event in this frame (accumulated if this event
* should be recorded more than once). */
double m_duration;
/** Distance of marker from root (for nested events), used to
* adjust vertical height when drawing. */
size_t m_layer;
public:
// --------------------------------------------------------------------
Marker() { m_start = 0; m_duration = 0; m_layer = 0; }
/** Name of event. */
std::string m_name;
/** Color for this event. */
video::SColor m_color;
Marker(double start, double end, const char* name="N/A",
const video::SColor& color=video::SColor(), size_t layer=0)
: m_start(start), m_end(end), m_layer(layer), m_name(name)
, m_color(color)
// --------------------------------------------------------------------
Marker(double start, size_t layer=0)
: m_start(start), m_duration(0), m_layer(layer)
{
}
// --------------------------------------------------------------------
Marker(const Marker& ref)
: m_start(ref.m_start), m_end(ref.m_end), m_layer(ref.m_layer)
, m_name(ref.m_name), m_color(ref.m_color)
: m_start(ref.m_start), m_duration(ref.m_duration),
m_layer(ref.m_layer)
{
}
}; // struct Marker
// ------------------------------------------------------------------------
// --------------------------------------------------------------------
/** Returns the start time of this event marker. */
double getStart() const { return m_start; }
// --------------------------------------------------------------------
/** Returns the end time of this event marker. */
double getEnd() const { return m_start+m_duration; }
// --------------------------------------------------------------------
/** Returns the duration of this event. */
double getDuration() const { return m_duration; }
// --------------------------------------------------------------------
size_t getLayer() const { return m_layer; }
// --------------------------------------------------------------------
/** Called when an entry in the cyclic buffer is reused. Makes sure
* that time for a new event can be accumulated. */
void clear() { m_duration = 0; }
// --------------------------------------------------------------------
/** Sets start time and layer for this event. */
void setStart(double start, size_t layer = 0)
{
m_start = start; m_layer = layer;
} // setStart
// --------------------------------------------------------------------
/** Sets the end time of this event. */
void setEnd(double end)
{
m_duration += (end - m_start);
} // setEnd
typedef std::list<Marker> MarkerList;
typedef std::stack<Marker> MarkerStack;
}; // class Marker
struct ThreadInfo
// ========================================================================
/** The data for one event. It contains the events colours, all markers
* for the buffer period and a stack to detect nesting of markers.
*/
class EventData
{
MarkerList m_markers_done[2];
MarkerStack m_markers_stack[2];
};
// ------------------------------------------------------------------------
private:
/** Colour to use in the on-screen display */
video::SColor m_colour;
typedef std::vector<ThreadInfo> ThreadInfoList;
/** Vector of all buffered markers. */
std::vector<Marker> m_all_markers;
ThreadInfoList m_thread_infos;
public:
EventData() {}
EventData(video::SColor colour, int max_size)
{
m_all_markers.resize(max_size);
m_colour = colour;
} // EventData
// --------------------------------------------------------------------
/** Records the start of an event for a given frame. */
void setStart(size_t frame, double start, int layer)
{
assert(frame < m_all_markers.capacity());
m_all_markers[frame].setStart(start, layer);
} // setStart
// --------------------------------------------------------------------
/** Records the end of an event for a given frame. */
void setEnd(size_t frame, double end)
{
assert(frame < m_all_markers.capacity());
m_all_markers[frame].setEnd(end);
} // setEnd
// --------------------------------------------------------------------
const Marker& getMarker(int n) const { return m_all_markers[n]; }
Marker& getMarker(int n) { return m_all_markers[n]; }
// --------------------------------------------------------------------
/** Returns the colour for this event. */
video::SColor getColour() const { return m_colour; }
// --------------------------------------------------------------------
}; // EventData
/** Which of the two buffers to store data in. */
int m_write_id;
double m_time_last_sync;
double m_time_between_sync;
// ========================================================================
/** The mapping of event names to the corresponding EventData. */
typedef std::map<std::string, EventData> AllEventData;
// ========================================================================
struct ThreadData
{
/** Stack of events to detect nesting. */
std::vector< std::string > m_event_stack;
/** This stores the event names in the order in which they occur.
* This means that 'outer' events occur here before any child
* events. This list is then used to determine the order in which the
* bar graphs are drawn, which results in the proper nesting of events.*/
std::vector<std::string> m_ordered_headings;
AllEventData m_all_event_data;
}; // class ThreadData
// ========================================================================
/** Data structure containing all currently buffered markers. The index
* is the thread id. */
std::vector< ThreadData> m_all_threads_data;
/** A mapping of thread_t pointers to a unique integer (starting from 0).*/
std::vector<pthread_t> m_thread_mapping;
/** Buffer for the GPU times (in ms). */
std::vector<int> m_gpu_times;
/** Counts the threads used, i.e. registered in m_thread_mapping. */
int m_threads_used;
/** Index of the current frame in the buffer. */
int m_current_frame;
/** We don't need the bool, but easiest way to get a lock for the whole
* instance (since we need to avoid that a synch is done which changes
* the current frame while another threaded uses this variable, or
* while a new thread is added. */
Synchronised<bool> m_lock;
/** True if the circular buffer has wrapped around. */
bool m_has_wrapped_around;
/** The maximum number of frames to be buffered. Used to minimise
* reallocations. */
int m_max_frames;
/** Time of last sync. All start/end times are stored relative
* to this time. */
double m_time_last_sync;
/** Time between now and last sync, used to scale the GUI bar. */
double m_time_between_sync;
/** List of all event names. This list is sorted to make sure
* if the circular buffer is dumped more than once the order
* of events remains the same. */
std::vector<std::string> m_all_event_names;
// Handling freeze/unfreeze by clicking on the display
enum FreezeState
@ -201,33 +277,23 @@ private:
FreezeState m_freeze_state;
bool m_capture_report;
bool m_first_capture_sweep;
bool m_first_gpu_capture_sweep;
StringBuffer* m_capture_report_buffer;
StringBuffer* m_gpu_capture_report_buffer;
protected:
// TODO: detect on which thread this is called to support multithreading
ThreadInfo& getThreadInfo() { return m_thread_infos[0]; }
void drawBackground();
private:
int getThreadID();
void drawBackground();
public:
Profiler();
Profiler();
virtual ~Profiler();
void pushCpuMarker(const char* name="N/A",
const video::SColor& color=video::SColor());
void popCpuMarker();
void synchronizeFrame();
void draw();
void onClick(const core::vector2di& mouse_pos);
void pushCPUMarker(const char* name="N/A",
const video::SColor& color=video::SColor());
void popCPUMarker();
void toggleStatus();
void synchronizeFrame();
void draw();
void onClick(const core::vector2di& mouse_pos);
void writeToFile();
// ------------------------------------------------------------------------
bool getCaptureReport() const { return m_capture_report; }
// ------------------------------------------------------------------------
void setCaptureReport(bool captureReport);
// ------------------------------------------------------------------------
bool isFrozen() const { return m_freeze_state == FROZEN; }