Very first version of profiler that supports multiple threads

and handles events in branches better.
This commit is contained in:
hiker 2017-07-31 22:56:47 +10:00
parent 47d2e30c5f
commit ea12c8f494
4 changed files with 423 additions and 319 deletions

View File

@ -660,6 +660,10 @@ namespace UserConfigParams
/** True if graphical profiler should be displayed */ /** True if graphical profiler should be displayed */
PARAM_PREFIX bool m_profiler_enabled PARAM_DEFAULT( false ); PARAM_PREFIX bool m_profiler_enabled PARAM_DEFAULT( false );
/** How many seconds worth of data the circular profile buffer
* can store. */
PARAM_PREFIX float m_profiler_buffer_duration PARAM_DEFAULT(20.0f);
// not saved to file // not saved to file
// ---- Networking // ---- Networking

View File

@ -88,7 +88,7 @@ enum DebugMenuCommand
DEBUG_GRAPHICS_BULLET_2, DEBUG_GRAPHICS_BULLET_2,
DEBUG_GRAPHICS_BOUNDING_BOXES_VIZ, DEBUG_GRAPHICS_BOUNDING_BOXES_VIZ,
DEBUG_PROFILER, DEBUG_PROFILER,
DEBUG_PROFILER_GENERATE_REPORT, DEBUG_PROFILER_WRITE_REPORT,
DEBUG_FONT_DUMP_GLYPH_PAGE, DEBUG_FONT_DUMP_GLYPH_PAGE,
DEBUG_FONT_RELOAD, DEBUG_FONT_RELOAD,
DEBUG_FPS, DEBUG_FPS,
@ -349,8 +349,8 @@ bool handleContextMenuAction(s32 cmd_id)
UserConfigParams::m_profiler_enabled = UserConfigParams::m_profiler_enabled =
!UserConfigParams::m_profiler_enabled; !UserConfigParams::m_profiler_enabled;
break; break;
case DEBUG_PROFILER_GENERATE_REPORT: case DEBUG_PROFILER_WRITE_REPORT:
profiler.setCaptureReport(!profiler.getCaptureReport()); profiler.writeToFile();
break; break;
case DEBUG_THROTTLE_FPS: case DEBUG_THROTTLE_FPS:
main_loop->setThrottleFPS(false); main_loop->setThrottleFPS(false);
@ -822,8 +822,8 @@ bool onEvent(const SEvent &event)
mnu->addItem(L"Profiler", DEBUG_PROFILER); mnu->addItem(L"Profiler", DEBUG_PROFILER);
if (UserConfigParams::m_profiler_enabled) if (UserConfigParams::m_profiler_enabled)
mnu->addItem(L"Toggle capture profiler report", mnu->addItem(L"Save profiler report",
DEBUG_PROFILER_GENERATE_REPORT); DEBUG_PROFILER_WRITE_REPORT);
mnu->addItem(L"Do not limit FPS", DEBUG_THROTTLE_FPS); mnu->addItem(L"Do not limit FPS", DEBUG_THROTTLE_FPS);
mnu->addItem(L"Toggle FPS", DEBUG_FPS); mnu->addItem(L"Toggle FPS", DEBUG_FPS);
mnu->addItem(L"Save replay", DEBUG_SAVE_REPLAY); mnu->addItem(L"Save replay", DEBUG_SAVE_REPLAY);

View File

@ -16,6 +16,8 @@
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "profiler.hpp" #include "profiler.hpp"
#include "config/user_config.hpp"
#include "graphics/glwrap.hpp" #include "graphics/glwrap.hpp"
#include "graphics/irr_driver.hpp" #include "graphics/irr_driver.hpp"
#include "graphics/2dutils.hpp" #include "graphics/2dutils.hpp"
@ -26,11 +28,11 @@
#include "io/file_manager.hpp" #include "io/file_manager.hpp"
#include "utils/vs.hpp" #include "utils/vs.hpp"
#include <assert.h>
#include <stack>
#include <sstream>
#include <algorithm> #include <algorithm>
#include <fstream> #include <fstream>
#include <ostream>
#include <stack>
#include <sstream>
static const char* GPU_Phase[Q_LAST] = static const char* GPU_Phase[Q_LAST] =
{ {
@ -74,7 +76,8 @@ Profiler profiler;
#define MARKERS_NAMES_POS core::rect<s32>(50,100,150,200) #define MARKERS_NAMES_POS core::rect<s32>(50,100,150,200)
#define GPU_MARKERS_NAMES_POS core::rect<s32>(50,165,150,250) #define GPU_MARKERS_NAMES_POS core::rect<s32>(50,165,150,250)
#define TIME_DRAWN_MS 30.0f // the width of the profiler corresponds to TIME_DRAWN_MS milliseconds // The width of the profiler corresponds to TIME_DRAWN_MS milliseconds
#define TIME_DRAWN_MS 30.0f
// --- Begin portable precise timer --- // --- Begin portable precise timer ---
#ifdef WIN32 #ifdef WIN32
@ -90,7 +93,7 @@ Profiler profiler;
LARGE_INTEGER timer; LARGE_INTEGER timer;
QueryPerformanceCounter(&timer); QueryPerformanceCounter(&timer);
return double(timer.QuadPart) / perFreq; return double(timer.QuadPart) / perFreq;
} } // getTimeMilliseconds
#else #else
#include <sys/time.h> #include <sys/time.h>
@ -99,160 +102,152 @@ Profiler profiler;
struct timeval tv; struct timeval tv;
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
return double(tv.tv_sec * 1000) + (double(tv.tv_usec) / 1000.0); return double(tv.tv_sec * 1000) + (double(tv.tv_usec) / 1000.0);
} } // getTimeMilliseconds
#endif #endif
// --- End portable precise timer --- // --- End portable precise timer ---
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
Profiler::Profiler() Profiler::Profiler()
{ {
m_thread_infos.resize(1); // TODO: monothread now, should support multithreading
m_write_id = 0;
m_time_last_sync = getTimeMilliseconds(); m_time_last_sync = getTimeMilliseconds();
m_time_between_sync = 0.0; m_time_between_sync = 0.0;
m_freeze_state = UNFROZEN; m_freeze_state = UNFROZEN;
m_capture_report = false;
m_first_capture_sweep = true; // By limiting the number of threads that can be created, we avoid the
m_first_gpu_capture_sweep = true; // problem that all access to m_all_event_data need to be locked
m_capture_report_buffer = NULL; // (otherwise adding a thread to m_all_event_data can trigger a
} // reallocate, which makes concurrent access invalid)
m_max_frames = int( UserConfigParams::m_profiler_buffer_duration
* UserConfigParams::m_max_fps );
m_current_frame = 0;
m_has_wrapped_around = false;
m_threads_used = 0;
const int MAX_THREADS = 10;
m_all_threads_data.resize(MAX_THREADS);
m_thread_mapping.getData().resize(MAX_THREADS);
m_gpu_times.resize(Q_LAST*m_max_frames);
} // Profile
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
Profiler::~Profiler() Profiler::~Profiler()
{ {
} } // ~Profiler
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
int Profiler::getThreadID()
{
m_thread_mapping.lock();
pthread_t thread = pthread_self();
int i = 0;
while(i < m_threads_used)
{
if (memcmp( &m_thread_mapping.getData()[i],
&thread,
sizeof(thread)) ==0 )
{
m_thread_mapping.unlock();
return i;
}
i++;
} // for i <m_threads_used
void Profiler::setCaptureReport(bool captureReport) assert(m_threads_used < (int)m_thread_mapping.getData().size());
{ m_thread_mapping.getData()[m_threads_used] = thread;
if (!m_capture_report && captureReport) m_threads_used++;
{ m_thread_mapping.unlock();
m_capture_report = true;
m_first_capture_sweep = true;
m_first_gpu_capture_sweep = true;
// TODO: a 20 MB hardcoded buffer for now. That should amply suffice for
// all reasonable purposes. But it's not too clean to hardcode
m_capture_report_buffer = new StringBuffer(20 * 1024 * 1024);
m_gpu_capture_report_buffer = new StringBuffer(20 * 1024 * 1024);
}
else if (m_capture_report && !captureReport)
{
// when disabling capture to file, flush captured data to a file
{
std::ofstream filewriter(file_manager->getUserConfigFile("profiling.csv").c_str(), std::ios::out | std::ios::binary);
const char* str = m_capture_report_buffer->getRawBuffer();
filewriter.write(str, strlen(str));
}
{
std::ofstream filewriter(file_manager->getUserConfigFile("profiling_gpu.csv").c_str(), std::ios::out | std::ios::binary);
const char* str = m_gpu_capture_report_buffer->getRawBuffer();
filewriter.write(str, strlen(str));
}
m_capture_report = false; return m_threads_used - 1;
} // getThreadID
delete m_capture_report_buffer;
m_capture_report_buffer = NULL;
delete m_gpu_capture_report_buffer;
m_gpu_capture_report_buffer = NULL;
}
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
/// Push a new marker that starts now /// Push a new marker that starts now
void Profiler::pushCpuMarker(const char* name, const video::SColor& color) void Profiler::pushCPUMarker(const char* name, const video::SColor& colour)
{ {
// Don't do anything when frozen // Don't do anything when frozen
if (m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE) if (m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
return; return;
ThreadInfo& ti = getThreadInfo();
MarkerStack& markers_stack = ti.markers_stack[m_write_id];
double start = getTimeMilliseconds() - m_time_last_sync; double start = getTimeMilliseconds() - m_time_last_sync;
size_t layer = markers_stack.size(); int thread_id = getThreadID();
// Add to the stack of current markers ThreadData &td = m_all_threads_data[thread_id];
markers_stack.push(Marker(start, -1.0, name, color, layer)); AllEventData::iterator i = td.m_all_event_data.find(name);
if (i != td.m_all_event_data.end())
{
i->second.setStart(m_current_frame, start, td.m_event_stack.size());
} }
else
{
EventData ed(colour, m_max_frames);
ed.setStart(m_current_frame, start, td.m_event_stack.size());
td.m_all_event_data[name] = ed;
}
td.m_event_stack.push_back(name);
} // pushCPUMarker
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
/// Stop the last pushed marker /// Stop the last pushed marker
void Profiler::popCpuMarker() void Profiler::popCPUMarker()
{ {
// Don't do anything when frozen // Don't do anything when frozen
if(m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE) if(m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
return; return;
ThreadInfo& ti = getThreadInfo(); int thread_id = getThreadID();
assert(ti.markers_stack[m_write_id].size() > 0); ThreadData &td = m_all_threads_data[thread_id];
MarkerStack& markers_stack = ti.markers_stack[m_write_id]; assert(td.m_event_stack.size() > 0);
MarkerList& markers_done = ti.markers_done[m_write_id];
// Update the date of end of the marker const std::string &name = td.m_event_stack.back();
Marker& marker = markers_stack.top(); td.m_all_event_data[name].setEnd(m_current_frame,
marker.end = getTimeMilliseconds() - m_time_last_sync; getTimeMilliseconds() - m_time_last_sync);
// Remove the marker from the stack and add it to the list of markers done td.m_event_stack.pop_back();
markers_done.push_front(marker); } // popCPUMarker
markers_stack.pop();
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
/// Swap buffering for the markers /** Saves all data for the current frame, and starts the next frame in the
* circular buffer. Any events that are currently active (e.g. in a separate
* thread) will be split in two parts: the beginning (till now) in the current
* frame, the rest will be added to the next frame.
*/
void Profiler::synchronizeFrame() void Profiler::synchronizeFrame()
{ {
// Don't do anything when frozen // Don't do anything when frozen
if(m_freeze_state == FROZEN) if(m_freeze_state == FROZEN)
return; return;
// Avoid using several times getTimeMilliseconds(), which would yield different results // Avoid using several times getTimeMilliseconds(),
// which would yield different results
double now = getTimeMilliseconds(); double now = getTimeMilliseconds();
// Swap buffers // Set index to next frame
int old_write_id = m_write_id; int next_frame = m_current_frame+1;
m_write_id = !m_write_id; if (next_frame >= m_max_frames)
// For each thread:
ThreadInfoList::iterator it_end = m_thread_infos.end();
for(ThreadInfoList::iterator it = m_thread_infos.begin() ; it != it_end ; it++)
{ {
// Get the thread information next_frame = 0;
ThreadInfo& ti = *it; m_has_wrapped_around = true;
}
MarkerList& old_markers_done = ti.markers_done[old_write_id]; // First finish all markers that are currently in progress, and add
MarkerStack& old_markers_stack = ti.markers_stack[old_write_id]; // a new start marker for the next frame. So e.g. if a thread is busy in
// one event while the main thread syncs the frame, this event will get
MarkerList& new_markers_done = ti.markers_done[m_write_id]; // split into two parts in two consecutive frames
MarkerStack& new_markers_stack = ti.markers_stack[m_write_id]; for (int i = 0; i < m_threads_used; i++)
// Clear the containers for the new frame
new_markers_done.clear();
while(!new_markers_stack.empty())
new_markers_stack.pop();
// Finish the markers in the stack of the previous frame
// and start them for the next frame.
// For each marker in the old stack:
while(!old_markers_stack.empty())
{ {
// - finish the marker for the previous frame and add it to the old "done" list ThreadData &td = m_all_threads_data[i];
Marker& m = old_markers_stack.top(); for(unsigned int j=0; j<td.m_event_stack.size(); j++)
m.end = now - m_time_last_sync; {
old_markers_done.push_front(m); EventData &ed = td.m_all_event_data[td.m_event_stack[j]];
ed.setEnd(m_current_frame, now);
// - start a new one for the new frame ed.setStart(next_frame, now, j);
Marker new_marker(0.0, -1.0, m.name.c_str(), m.color);
new_markers_stack.push(new_marker);
// - next iteration
old_markers_stack.pop();
} }
} }
m_current_frame = next_frame;
// Remember the date of last synchronization // Remember the date of last synchronization
m_time_between_sync = now - m_time_last_sync; m_time_between_sync = now - m_time_last_sync;
m_time_last_sync = now; m_time_last_sync = now;
@ -262,7 +257,7 @@ void Profiler::synchronizeFrame()
m_freeze_state = FROZEN; m_freeze_state = FROZEN;
else if(m_freeze_state == WAITING_FOR_UNFREEZE) else if(m_freeze_state == WAITING_FOR_UNFREEZE)
m_freeze_state = UNFROZEN; m_freeze_state = UNFROZEN;
} } // synchronizeFrame
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
/// Draw the markers /// Draw the markers
@ -271,43 +266,41 @@ void Profiler::draw()
#ifndef SERVER_ONLY #ifndef SERVER_ONLY
PROFILER_PUSH_CPU_MARKER("ProfilerDraw", 0xFF, 0xFF, 0x00); PROFILER_PUSH_CPU_MARKER("ProfilerDraw", 0xFF, 0xFF, 0x00);
video::IVideoDriver* driver = irr_driver->getVideoDriver(); video::IVideoDriver* driver = irr_driver->getVideoDriver();
std::stack<Marker> hovered_markers;
// Current frame points to the frame in which currently data is
// being accumulated. Draw the previous (i.e. complete) frame.
int indx = m_current_frame - 1;
if (indx < 0) indx = m_max_frames - 1;
drawBackground(); drawBackground();
// Force to show the pointer // Force to show the pointer
irr_driver->showPointer(); irr_driver->showPointer();
int read_id = !m_write_id; // Compute some values for drawing (unit: pixels, but we keep floats
// for reducing errors accumulation)
// Compute some values for drawing (unit: pixels, but we keep floats for reducing errors accumulation)
core::dimension2d<u32> screen_size = driver->getScreenSize(); core::dimension2d<u32> screen_size = driver->getScreenSize();
const double profiler_width = (1.0 - 2.0*MARGIN_X) * screen_size.Width; const double profiler_width = (1.0 - 2.0*MARGIN_X) * screen_size.Width;
const double x_offset = MARGIN_X*screen_size.Width; const double x_offset = MARGIN_X*screen_size.Width;
const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height; const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
const double line_height = LINE_HEIGHT*screen_size.Height; const double line_height = LINE_HEIGHT*screen_size.Height;
size_t nb_thread_infos = m_thread_infos.size();
// Compute start end end time for this frame
double start = -1.0f; double start = 99999.0f;
double end = -1.0f; double end = -1.0f;
for (size_t i = 0; i < nb_thread_infos; i++) for (int i = 0; i < m_threads_used; i++)
{ {
MarkerList& markers = m_thread_infos[i].markers_done[read_id]; AllEventData &aed = m_all_threads_data[i].m_all_event_data;
AllEventData::iterator j;
MarkerList::const_iterator it_end = markers.end(); for (j = aed.begin(); j != aed.end(); ++j)
for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++)
{ {
const Marker& m = *it; const Marker &marker = j->second.getMarker(indx);
start = std::min(start, marker.getStart() );
end = std::max(end, marker.getEnd() );
} // for j in events
} // for i in threads
if (start < 0.0) start = m.start;
else start = std::min(start, m.start);
if (end < 0.0) end = m.end;
else end = std::max(end, m.end);
}
}
const double duration = end - start; const double duration = end - start;
const double factor = profiler_width / duration; const double factor = profiler_width / duration;
@ -315,70 +308,47 @@ void Profiler::draw()
// Get the mouse pos // Get the mouse pos
core::vector2di mouse_pos = GUIEngine::EventHandler::get()->getMousePos(); core::vector2di mouse_pos = GUIEngine::EventHandler::get()->getMousePos();
// For each thread: std::stack<AllEventData::iterator> hovered_markers;
for (size_t i = 0; i < nb_thread_infos; i++) for (int i = 0; i < m_threads_used; i++)
{ {
// Draw all markers ThreadData &td = m_all_threads_data[i];
MarkerList& markers = m_thread_infos[i].markers_done[read_id]; AllEventData &aed = td.m_all_event_data;
AllEventData::iterator j;
if (markers.empty()) for (j = aed.begin(); j != aed.end(); ++j)
continue;
if (m_capture_report)
{ {
if (m_first_capture_sweep) const Marker &marker = j->second.getMarker(indx);
m_capture_report_buffer->getStdStream() << "\"Thread\";"; core::rect<s32> pos((s32)(x_offset + factor*marker.getStart()),
else
m_capture_report_buffer->getStdStream() << i << ";";
}
MarkerList::const_iterator it_end = markers.end();
for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++)
{
const Marker& m = *it;
assert(m.end >= 0.0);
if (m_capture_report)
{
if (m_first_capture_sweep)
m_capture_report_buffer->getStdStream() << "\"" << m.name << "\";";
else
m_capture_report_buffer->getStdStream() << (int)round((m.end - m.start) * 1000) << ";";
}
core::rect<s32> pos((s32)( x_offset + factor*m.start ),
(s32)(y_offset + i*line_height), (s32)(y_offset + i*line_height),
(s32)( x_offset + factor*m.end ), (s32)(x_offset + factor*marker.getEnd()),
(s32)(y_offset + (i + 1)*line_height) ); (s32)(y_offset + (i + 1)*line_height) );
// Reduce vertically the size of the markers according to their layer // Reduce vertically the size of the markers according to their layer
pos.UpperLeftCorner.Y += m.layer*2; pos.UpperLeftCorner.Y += 2 * marker.getLayer();
pos.LowerRightCorner.Y -= m.layer*2; pos.LowerRightCorner.Y -= 2 * marker.getLayer();
GL32_draw2DRectangle(m.color, pos);
GL32_draw2DRectangle(j->second.getColour(), pos);
// If the mouse cursor is over the marker, get its information // If the mouse cursor is over the marker, get its information
if (pos.isPointInside(mouse_pos)) if (pos.isPointInside(mouse_pos))
hovered_markers.push(m); {
hovered_markers.push(j);
} }
if (m_capture_report) } // for j in AllEventdata
{ } // for i in threads
m_capture_report_buffer->getStdStream() << "\n";
m_first_capture_sweep = false;
}
}
// GPU profiler // GPU profiler
QueryPerf hovered_gpu_marker = Q_LAST; QueryPerf hovered_gpu_marker = Q_LAST;
long hovered_gpu_marker_elapsed = 0; long hovered_gpu_marker_elapsed = 0;
int gpu_y = int(y_offset + nb_thread_infos*line_height + line_height/2); int gpu_y = int(y_offset + m_threads_used*line_height + line_height/2);
float total = 0; float total = 0;
unsigned int gpu_timers[Q_LAST];
for (unsigned i = 0; i < Q_LAST; i++) for (unsigned i = 0; i < Q_LAST; i++)
{ {
#ifndef SERVER_ONLY #ifndef SERVER_ONLY
gpu_timers[i] = irr_driver->getGPUTimer(i).elapsedTimeus(); int n = irr_driver->getGPUTimer(i).elapsedTimeus();
m_gpu_times[indx*Q_LAST + i] = n;
total += n;
#endif #endif
total += gpu_timers[i];
} }
static video::SColor colors[] = { static video::SColor colors[] = {
@ -395,9 +365,10 @@ void Profiler::draw()
float curr_val = 0; float curr_val = 0;
for (unsigned i = 0; i < Q_LAST; i++) for (unsigned i = 0; i < Q_LAST; i++)
{ {
//Log::info("GPU Perf", "Phase %d : %d us\n", i, irr_driver->getGPUTimer(i).elapsedTimeus()); //Log::info("GPU Perf", "Phase %d : %d us\n", i,
// irr_driver->getGPUTimer(i).elapsedTimeus());
float elapsed = float(gpu_timers[i]); float elapsed = float(m_gpu_times[indx*Q_LAST+i]);
core::rect<s32> pos((s32)(x_offset + (curr_val / total)*profiler_width), core::rect<s32> pos((s32)(x_offset + (curr_val / total)*profiler_width),
(s32)(y_offset + gpu_y), (s32)(y_offset + gpu_y),
(s32)(x_offset + ((curr_val + elapsed) / total)*profiler_width), (s32)(x_offset + ((curr_val + elapsed) / total)*profiler_width),
@ -409,32 +380,23 @@ void Profiler::draw()
if (pos.isPointInside(mouse_pos)) if (pos.isPointInside(mouse_pos))
{ {
hovered_gpu_marker = (QueryPerf)i; hovered_gpu_marker = (QueryPerf)i;
hovered_gpu_marker_elapsed = gpu_timers[i]; hovered_gpu_marker_elapsed = m_gpu_times[indx*Q_LAST+i];
} }
if (m_capture_report)
{
if (m_first_gpu_capture_sweep)
m_gpu_capture_report_buffer->getStdStream() << GPU_Phase[i] << ";";
else
m_gpu_capture_report_buffer->getStdStream() << elapsed << ";";
}
} }
if (m_capture_report)
{
m_gpu_capture_report_buffer->getStdStream() << "\n";
m_first_gpu_capture_sweep = false;
}
} }
// Draw the end of the frame // Draw the end of the frame
{ {
s32 x_sync = (s32)(x_offset + factor*m_time_between_sync); s32 x_sync = (s32)(x_offset + factor*m_time_between_sync);
s32 y_up_sync = (s32)(MARGIN_Y*screen_size.Height); s32 y_up_sync = (s32)(MARGIN_Y*screen_size.Height);
s32 y_down_sync = (s32)( (MARGIN_Y + (2+nb_thread_infos)*LINE_HEIGHT)*screen_size.Height ); s32 y_down_sync = (s32)( (MARGIN_Y + (2+m_threads_used)*LINE_HEIGHT)
* screen_size.Height );
GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00), core::rect<s32>(x_sync, y_up_sync, x_sync + 1, y_down_sync)); GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00),
core::rect<s32>(x_sync, y_up_sync,
x_sync + 1, y_down_sync));
} }
// Draw the hovered markers' names // Draw the hovered markers' names
@ -444,12 +406,13 @@ void Profiler::draw()
core::stringw text; core::stringw text;
while(!hovered_markers.empty()) while(!hovered_markers.empty())
{ {
Marker& m = hovered_markers.top(); AllEventData::iterator j = hovered_markers.top();
const Marker &marker = j->second.getMarker(indx);
std::ostringstream oss; std::ostringstream oss;
oss.precision(4); oss.precision(4);
oss << m.name << " [" << (m.end - m.start) << " ms / "; oss << j->first << " [" << (marker.getDuration()) << " ms / ";
oss.precision(3); oss.precision(3);
oss << (m.end - m.start)*100.0 / duration << "%]" << std::endl; oss << marker.getDuration()*100.0 / duration << "%]" << std::endl;
text += oss.str().c_str(); text += oss.str().c_str();
hovered_markers.pop(); hovered_markers.pop();
} }
@ -458,19 +421,16 @@ void Profiler::draw()
if (hovered_gpu_marker != Q_LAST) if (hovered_gpu_marker != Q_LAST)
{ {
std::ostringstream oss; std::ostringstream oss;
oss << GPU_Phase[hovered_gpu_marker] << " : " << hovered_gpu_marker_elapsed << " us"; oss << GPU_Phase[hovered_gpu_marker] << " : "
font->draw(oss.str().c_str(), GPU_MARKERS_NAMES_POS, video::SColor(0xFF, 0xFF, 0x00, 0x00)); << hovered_gpu_marker_elapsed << " us";
font->draw(oss.str().c_str(), GPU_MARKERS_NAMES_POS,
video::SColor(0xFF, 0xFF, 0x00, 0x00));
} }
} }
if (m_capture_report)
{
font->draw("Capturing profiler report...", MARKERS_NAMES_POS, video::SColor(0xFF, 0x00, 0x90, 0x00));
}
PROFILER_POP_CPU_MARKER(); PROFILER_POP_CPU_MARKER();
#endif #endif
} } // draw
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
/// Handle freeze/unfreeze /// Handle freeze/unfreeze
@ -479,7 +439,8 @@ void Profiler::onClick(const core::vector2di& mouse_pos)
video::IVideoDriver* driver = irr_driver->getVideoDriver(); video::IVideoDriver* driver = irr_driver->getVideoDriver();
const core::dimension2d<u32>& screen_size = driver->getScreenSize(); const core::dimension2d<u32>& screen_size = driver->getScreenSize();
core::rect<s32>background_rect((int)(MARGIN_X * screen_size.Width), core::rect<s32>background_rect(
(int)(MARGIN_X * screen_size.Width),
(int)(MARGIN_Y * screen_size.Height), (int)(MARGIN_Y * screen_size.Height),
(int)((1.0-MARGIN_X) * screen_size.Width), (int)((1.0-MARGIN_X) * screen_size.Width),
(int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height) ); (int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height) );
@ -504,7 +465,7 @@ void Profiler::onClick(const core::vector2di& mouse_pos)
// Same the other way around. // Same the other way around.
break; break;
} }
} } // onClick
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
/// Helper to draw a white background /// Helper to draw a white background
@ -514,7 +475,8 @@ void Profiler::drawBackground()
video::IVideoDriver* driver = irr_driver->getVideoDriver(); video::IVideoDriver* driver = irr_driver->getVideoDriver();
const core::dimension2d<u32>& screen_size = driver->getScreenSize(); const core::dimension2d<u32>& screen_size = driver->getScreenSize();
core::rect<s32>background_rect((int)(MARGIN_X * screen_size.Width), core::rect<s32>background_rect(
(int)(MARGIN_X * screen_size.Width),
(int)((MARGIN_Y + 0.25f) * screen_size.Height), (int)((MARGIN_Y + 0.25f) * screen_size.Height),
(int)((1.0-MARGIN_X) * screen_size.Width), (int)((1.0-MARGIN_X) * screen_size.Width),
(int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height) ); (int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height) );
@ -522,4 +484,71 @@ void Profiler::drawBackground()
video::SColor color(0x88, 0xFF, 0xFF, 0xFF); video::SColor color(0x88, 0xFF, 0xFF, 0xFF);
GL32_draw2DRectangle(color, background_rect); GL32_draw2DRectangle(color, background_rect);
#endif #endif
} // drawBackground
//-----------------------------------------------------------------------------
/** Saves the collected profile data to a file. Filename is based on the
* stdout name (with -profile appended).
*/
void Profiler::writeToFile()
{
std::string base_name =
file_manager->getUserConfigFile(file_manager->getStdoutName());
// First CPU data
std::ofstream f(base_name + ".profile-cpu");
for (int thread_id = 0; thread_id < m_threads_used; thread_id++)
{
ThreadData &td = m_all_threads_data[thread_id];
AllEventData::iterator j;
std::vector<std::string> new_headings;
for (j = td.m_all_event_data.begin(); j != td.m_all_event_data.end(); j++)
{
std::vector<std::string>::iterator f =
std::find(m_all_event_names.begin(),
m_all_event_names.end(), j->first);
if(f==m_all_event_names.end())
new_headings.push_back(j->first);
} }
std::sort(new_headings.begin(), new_headings.end());
f << "# \"Thread(1)\" ";
for (unsigned int i = 0; i < new_headings.size(); i++)
f << "\"" << new_headings[i] << "(" << i+2 <<")\" ";
f << std::endl;
int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
if (start > m_max_frames) start -= m_max_frames;
while (start != m_current_frame)
{
f << "t" << thread_id << " ";
for (unsigned int i = 0; i < new_headings.size(); i++)
{
const EventData &ed = td.m_all_event_data[new_headings[i]];
f << int(ed.getMarker(start).getDuration()*1000) << " ";
} // for i i new_headings
f << std::endl;
start = (start + 1) % m_max_frames;
}
} // for
f.close();
std::ofstream f_gpu(base_name + ".profile-gpu");
f_gpu << "# ";
for (unsigned i = 0; i < Q_LAST; i++)
{
f_gpu << "\"" << GPU_Phase[i] << "(" << i+1 << ")\" ";
} // for i < Q_LAST
f_gpu << std::endl;
int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
if (start > m_max_frames) start -= m_max_frames;
while (start != m_current_frame)
{
for (unsigned i = 0; i < Q_LAST; i++)
{
f_gpu << m_gpu_times[start*Q_LAST + i] << " ";
} // for i < Q_LAST
f_gpu << std::endl;
start = (start + 1) % m_max_frames;
}
f.close();
} // writeFile

View File

@ -18,15 +18,20 @@
#ifndef PROFILER_HPP #ifndef PROFILER_HPP
#define PROFILER_HPP #define PROFILER_HPP
#include <irrlicht.h> #include "utils/synchronised.hpp"
#include <list>
#include <vector>
#include <stack>
#include <string>
#include <streambuf>
#include <ostream>
#include <iostream>
#include <irrlicht.h>
#include <pthread.h>
#include <assert.h>
#include <iostream>
#include <list>
#include <map>
#include <ostream>
#include <stack>
#include <streambuf>
#include <string>
#include <vector>
enum QueryPerf enum QueryPerf
{ {
@ -70,10 +75,10 @@ double getTimeMilliseconds();
#ifdef ENABLE_PROFILER #ifdef ENABLE_PROFILER
#define PROFILER_PUSH_CPU_MARKER(name, r, g, b) \ #define PROFILER_PUSH_CPU_MARKER(name, r, g, b) \
profiler.pushCpuMarker(name, video::SColor(0xFF, r, g, b)) profiler.pushCPUMarker(name, video::SColor(0xFF, r, g, b))
#define PROFILER_POP_CPU_MARKER() \ #define PROFILER_POP_CPU_MARKER() \
profiler.popCpuMarker() profiler.popCPUMarker()
#define PROFILER_SYNC_FRAME() \ #define PROFILER_SYNC_FRAME() \
profiler.synchronizeFrame() profiler.synchronizeFrame()
@ -89,85 +94,161 @@ double getTimeMilliseconds();
using namespace irr; using namespace irr;
/** For profiling reports, we need a custom strijng stream that writes to a large // ============================================================================
pre-allocated buffer, to avoid allocating as much as possible durign profiling */ /** \brief class that allows run-time graphical profiling through the use
template <typename char_type> * of markers.
struct ostreambuf : public std::basic_streambuf<char_type, std::char_traits<char_type> >
{
ostreambuf(char_type* buffer, std::streamsize bufferLength)
{
// set the "put" pointer the start of the buffer and record it's length.
this->setp(buffer, buffer + bufferLength);
}
};
class StringBuffer
{
private:
char* m_buffer;
ostreambuf<char> ostreamBuffer;
std::ostream messageStream;
public:
StringBuffer(unsigned int size) : m_buffer((char*)calloc(size, 1)), ostreamBuffer(m_buffer, size), messageStream(&ostreamBuffer)
{
}
~StringBuffer()
{
free(m_buffer);
}
std::ostream& getStdStream() { return messageStream; }
char* getRawBuffer() { return m_buffer; }
};
/**
* \brief class that allows run-time graphical profiling through the use of markers
* \ingroup utils * \ingroup utils
*/ */
class Profiler class Profiler
{ {
private: private:
struct Marker // ------------------------------------------------------------------------
class Marker
{ {
double start; // Times of start and end, in milliseconds, private:
double end; // relatively to the time of last synchronization /** An event that is started (pushed) stores the start time in this
size_t layer; * variable. */
double m_start;
std::string name; /** Duration of the event in this frame (accumulated if this event
video::SColor color; * should be recorded more than once). */
Marker(double start, double end, const char* name="N/A", const video::SColor& color=video::SColor(), size_t layer=0) double m_duration;
: start(start), end(end), layer(layer), name(name), color(color) /** Distance of marker from root (for nested events), used to
* adjust vertical height when drawing. */
size_t m_layer;
public:
// --------------------------------------------------------------------
Marker() { m_start = 0; m_duration = 0; m_layer = 0; }
// --------------------------------------------------------------------
Marker(double start, size_t layer=0)
: m_start(start), m_duration(0), m_layer(layer)
{ {
} }
// --------------------------------------------------------------------
Marker(const Marker& ref) Marker(const Marker& ref)
: start(ref.start), end(ref.end), layer(ref.layer), name(ref.name), color(ref.color) : m_start(ref.m_start), m_duration(ref.m_duration),
m_layer(ref.m_layer)
{ {
} }
}; // --------------------------------------------------------------------
/** Returns the start time of this event marker. */
typedef std::list<Marker> MarkerList; double getStart() const { return m_start; }
typedef std::stack<Marker> MarkerStack; // --------------------------------------------------------------------
/** Returns the end time of this event marker. */
struct ThreadInfo double getEnd() const { return m_start+m_duration; }
// --------------------------------------------------------------------
/** Returns the duration of this event. */
double getDuration() const { return m_duration; }
// --------------------------------------------------------------------
size_t getLayer() const { return m_layer; }
// --------------------------------------------------------------------
/** Sets start time and layer for this event. */
void setStart(double start, size_t layer = 0)
{ {
MarkerList markers_done[2]; m_start = start; m_duration = 0; m_layer = layer;
MarkerStack markers_stack[2]; } // setStart
}; // --------------------------------------------------------------------
/** Sets the end time of this event. */
void setEnd(double end)
{
m_duration += (end - m_start);
} // setEnd
typedef std::vector<ThreadInfo> ThreadInfoList; }; // class Marker
ThreadInfoList m_thread_infos; // ========================================================================
int m_write_id; /** The data for one event. It contains the events colours, all markers
* for the buffer period and a stack to detect nesting of markers.
*/
class EventData
{
private:
/** Colour to use in the on-screen display */
video::SColor m_colour;
/** Vector of all buffered markers. */
std::vector<Marker> m_all_markers;
public:
EventData() {}
EventData(video::SColor colour, int max_size)
{
m_all_markers.resize(max_size);
m_colour = colour;
} // EventData
// --------------------------------------------------------------------
/** Records the start of an event for a given frame. */
void setStart(size_t frame, double start, int layer)
{
assert(frame < m_all_markers.capacity());
m_all_markers[frame].setStart(start, layer);
} // setStart
// --------------------------------------------------------------------
/** Records the end of an event for a given frame. */
void setEnd(size_t frame, double end)
{
assert(frame < m_all_markers.capacity());
m_all_markers[frame].setEnd(end);
} // setEnd
// --------------------------------------------------------------------
const Marker& getMarker(int n) const { return m_all_markers[n]; }
// --------------------------------------------------------------------
/** Returns the colour for this event. */
video::SColor getColour() const { return m_colour; }
// --------------------------------------------------------------------
}; // EventData
// ========================================================================
/** The mapping of event names to the corresponding EventData. */
typedef std::map<std::string, EventData> AllEventData;
// ========================================================================
struct ThreadData
{
/** Stack of events to detect nesting. */
std::vector< std::string > m_event_stack;
AllEventData m_all_event_data;
}; // class ThreadData
// ========================================================================
/** Data structure containing all currently buffered markers. The index
* is the thread id. */
std::vector< ThreadData> m_all_threads_data;
/** A mapping of thread_t pointers to a unique integer (starting from 0).*/
Synchronised< std::vector<pthread_t> > m_thread_mapping;
/** Buffer for the GPU times (in ms). */
std::vector<int> m_gpu_times;
/** Counts the threads used, i.e. registered in m_thread_mapping. */
int m_threads_used;
/** Index of the current frame in the buffer. */
int m_current_frame;
/** True if the circular buffer has wrapped around. */
bool m_has_wrapped_around;
/** The maximum number of frames to be buffered. Used to minimise
* reallocations. */
int m_max_frames;
/** Time of last sync. All start/end times are stored relative
* to this time. */
double m_time_last_sync; double m_time_last_sync;
/** Time between now and last sync, used to scale the GUI bar. */
double m_time_between_sync; double m_time_between_sync;
/** List of all event names. This list is sorted to make sure
* if the circular buffer is dumped more than once the order
* of events remains the same. */
std::vector<std::string> m_all_event_names;
// Handling freeze/unfreeze by clicking on the display // Handling freeze/unfreeze by clicking on the display
enum FreezeState enum FreezeState
{ {
@ -179,35 +260,25 @@ private:
FreezeState m_freeze_state; FreezeState m_freeze_state;
bool m_capture_report; private:
bool m_first_capture_sweep; int getThreadID();
bool m_first_gpu_capture_sweep; void drawBackground();
StringBuffer* m_capture_report_buffer;
StringBuffer* m_gpu_capture_report_buffer;
public: public:
Profiler(); Profiler();
virtual ~Profiler(); virtual ~Profiler();
void pushCpuMarker(const char* name="N/A", const video::SColor& color=video::SColor()); void pushCPUMarker(const char* name="N/A",
void popCpuMarker(); const video::SColor& color=video::SColor());
void popCPUMarker();
void synchronizeFrame(); void synchronizeFrame();
void draw(); void draw();
void onClick(const core::vector2di& mouse_pos); void onClick(const core::vector2di& mouse_pos);
void writeToFile();
bool getCaptureReport() const { return m_capture_report; } // ------------------------------------------------------------------------
void setCaptureReport(bool captureReport);
bool isFrozen() const { return m_freeze_state == FROZEN; } bool isFrozen() const { return m_freeze_state == FROZEN; }
protected:
// TODO: detect on which thread this is called to support multithreading
ThreadInfo& getThreadInfo() { return m_thread_infos[0]; }
void drawBackground();
}; };
#endif // PROFILER_HPP #endif // PROFILER_HPP