Very first version of profiler that supports multiple threads
and handles events in branches better.
This commit is contained in:
parent
47d2e30c5f
commit
ea12c8f494
@ -660,6 +660,10 @@ namespace UserConfigParams
|
||||
/** True if graphical profiler should be displayed */
|
||||
PARAM_PREFIX bool m_profiler_enabled PARAM_DEFAULT( false );
|
||||
|
||||
/** How many seconds worth of data the circular profile buffer
|
||||
* can store. */
|
||||
PARAM_PREFIX float m_profiler_buffer_duration PARAM_DEFAULT(20.0f);
|
||||
|
||||
// not saved to file
|
||||
|
||||
// ---- Networking
|
||||
|
@ -88,7 +88,7 @@ enum DebugMenuCommand
|
||||
DEBUG_GRAPHICS_BULLET_2,
|
||||
DEBUG_GRAPHICS_BOUNDING_BOXES_VIZ,
|
||||
DEBUG_PROFILER,
|
||||
DEBUG_PROFILER_GENERATE_REPORT,
|
||||
DEBUG_PROFILER_WRITE_REPORT,
|
||||
DEBUG_FONT_DUMP_GLYPH_PAGE,
|
||||
DEBUG_FONT_RELOAD,
|
||||
DEBUG_FPS,
|
||||
@ -349,8 +349,8 @@ bool handleContextMenuAction(s32 cmd_id)
|
||||
UserConfigParams::m_profiler_enabled =
|
||||
!UserConfigParams::m_profiler_enabled;
|
||||
break;
|
||||
case DEBUG_PROFILER_GENERATE_REPORT:
|
||||
profiler.setCaptureReport(!profiler.getCaptureReport());
|
||||
case DEBUG_PROFILER_WRITE_REPORT:
|
||||
profiler.writeToFile();
|
||||
break;
|
||||
case DEBUG_THROTTLE_FPS:
|
||||
main_loop->setThrottleFPS(false);
|
||||
@ -822,8 +822,8 @@ bool onEvent(const SEvent &event)
|
||||
|
||||
mnu->addItem(L"Profiler", DEBUG_PROFILER);
|
||||
if (UserConfigParams::m_profiler_enabled)
|
||||
mnu->addItem(L"Toggle capture profiler report",
|
||||
DEBUG_PROFILER_GENERATE_REPORT);
|
||||
mnu->addItem(L"Save profiler report",
|
||||
DEBUG_PROFILER_WRITE_REPORT);
|
||||
mnu->addItem(L"Do not limit FPS", DEBUG_THROTTLE_FPS);
|
||||
mnu->addItem(L"Toggle FPS", DEBUG_FPS);
|
||||
mnu->addItem(L"Save replay", DEBUG_SAVE_REPLAY);
|
||||
|
@ -16,6 +16,8 @@
|
||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
|
||||
#include "profiler.hpp"
|
||||
|
||||
#include "config/user_config.hpp"
|
||||
#include "graphics/glwrap.hpp"
|
||||
#include "graphics/irr_driver.hpp"
|
||||
#include "graphics/2dutils.hpp"
|
||||
@ -26,11 +28,11 @@
|
||||
#include "io/file_manager.hpp"
|
||||
#include "utils/vs.hpp"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stack>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <ostream>
|
||||
#include <stack>
|
||||
#include <sstream>
|
||||
|
||||
static const char* GPU_Phase[Q_LAST] =
|
||||
{
|
||||
@ -74,7 +76,8 @@ Profiler profiler;
|
||||
#define MARKERS_NAMES_POS core::rect<s32>(50,100,150,200)
|
||||
#define GPU_MARKERS_NAMES_POS core::rect<s32>(50,165,150,250)
|
||||
|
||||
#define TIME_DRAWN_MS 30.0f // the width of the profiler corresponds to TIME_DRAWN_MS milliseconds
|
||||
// The width of the profiler corresponds to TIME_DRAWN_MS milliseconds
|
||||
#define TIME_DRAWN_MS 30.0f
|
||||
|
||||
// --- Begin portable precise timer ---
|
||||
#ifdef WIN32
|
||||
@ -90,7 +93,7 @@ Profiler profiler;
|
||||
LARGE_INTEGER timer;
|
||||
QueryPerformanceCounter(&timer);
|
||||
return double(timer.QuadPart) / perFreq;
|
||||
}
|
||||
} // getTimeMilliseconds
|
||||
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
@ -99,160 +102,152 @@ Profiler profiler;
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return double(tv.tv_sec * 1000) + (double(tv.tv_usec) / 1000.0);
|
||||
}
|
||||
} // getTimeMilliseconds
|
||||
#endif
|
||||
// --- End portable precise timer ---
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
Profiler::Profiler()
|
||||
{
|
||||
m_thread_infos.resize(1); // TODO: monothread now, should support multithreading
|
||||
m_write_id = 0;
|
||||
m_time_last_sync = getTimeMilliseconds();
|
||||
m_time_between_sync = 0.0;
|
||||
m_freeze_state = UNFROZEN;
|
||||
m_capture_report = false;
|
||||
m_first_capture_sweep = true;
|
||||
m_first_gpu_capture_sweep = true;
|
||||
m_capture_report_buffer = NULL;
|
||||
}
|
||||
m_time_last_sync = getTimeMilliseconds();
|
||||
m_time_between_sync = 0.0;
|
||||
m_freeze_state = UNFROZEN;
|
||||
|
||||
// By limiting the number of threads that can be created, we avoid the
|
||||
// problem that all access to m_all_event_data need to be locked
|
||||
// (otherwise adding a thread to m_all_event_data can trigger a
|
||||
// reallocate, which makes concurrent access invalid)
|
||||
m_max_frames = int( UserConfigParams::m_profiler_buffer_duration
|
||||
* UserConfigParams::m_max_fps );
|
||||
m_current_frame = 0;
|
||||
m_has_wrapped_around = false;
|
||||
m_threads_used = 0;
|
||||
const int MAX_THREADS = 10;
|
||||
m_all_threads_data.resize(MAX_THREADS);
|
||||
m_thread_mapping.getData().resize(MAX_THREADS);
|
||||
m_gpu_times.resize(Q_LAST*m_max_frames);
|
||||
} // Profile
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
Profiler::~Profiler()
|
||||
{
|
||||
}
|
||||
} // ~Profiler
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void Profiler::setCaptureReport(bool captureReport)
|
||||
int Profiler::getThreadID()
|
||||
{
|
||||
if (!m_capture_report && captureReport)
|
||||
m_thread_mapping.lock();
|
||||
pthread_t thread = pthread_self();
|
||||
int i = 0;
|
||||
while(i < m_threads_used)
|
||||
{
|
||||
m_capture_report = true;
|
||||
m_first_capture_sweep = true;
|
||||
m_first_gpu_capture_sweep = true;
|
||||
// TODO: a 20 MB hardcoded buffer for now. That should amply suffice for
|
||||
// all reasonable purposes. But it's not too clean to hardcode
|
||||
m_capture_report_buffer = new StringBuffer(20 * 1024 * 1024);
|
||||
m_gpu_capture_report_buffer = new StringBuffer(20 * 1024 * 1024);
|
||||
}
|
||||
else if (m_capture_report && !captureReport)
|
||||
{
|
||||
// when disabling capture to file, flush captured data to a file
|
||||
if (memcmp( &m_thread_mapping.getData()[i],
|
||||
&thread,
|
||||
sizeof(thread)) ==0 )
|
||||
{
|
||||
std::ofstream filewriter(file_manager->getUserConfigFile("profiling.csv").c_str(), std::ios::out | std::ios::binary);
|
||||
const char* str = m_capture_report_buffer->getRawBuffer();
|
||||
filewriter.write(str, strlen(str));
|
||||
}
|
||||
{
|
||||
std::ofstream filewriter(file_manager->getUserConfigFile("profiling_gpu.csv").c_str(), std::ios::out | std::ios::binary);
|
||||
const char* str = m_gpu_capture_report_buffer->getRawBuffer();
|
||||
filewriter.write(str, strlen(str));
|
||||
m_thread_mapping.unlock();
|
||||
return i;
|
||||
}
|
||||
i++;
|
||||
} // for i <m_threads_used
|
||||
|
||||
m_capture_report = false;
|
||||
assert(m_threads_used < (int)m_thread_mapping.getData().size());
|
||||
m_thread_mapping.getData()[m_threads_used] = thread;
|
||||
m_threads_used++;
|
||||
m_thread_mapping.unlock();
|
||||
|
||||
delete m_capture_report_buffer;
|
||||
m_capture_report_buffer = NULL;
|
||||
|
||||
delete m_gpu_capture_report_buffer;
|
||||
m_gpu_capture_report_buffer = NULL;
|
||||
}
|
||||
}
|
||||
return m_threads_used - 1;
|
||||
} // getThreadID
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
/// Push a new marker that starts now
|
||||
void Profiler::pushCpuMarker(const char* name, const video::SColor& color)
|
||||
void Profiler::pushCPUMarker(const char* name, const video::SColor& colour)
|
||||
{
|
||||
// Don't do anything when frozen
|
||||
if(m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
|
||||
if (m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
|
||||
return;
|
||||
|
||||
ThreadInfo& ti = getThreadInfo();
|
||||
MarkerStack& markers_stack = ti.markers_stack[m_write_id];
|
||||
double start = getTimeMilliseconds() - m_time_last_sync;
|
||||
size_t layer = markers_stack.size();
|
||||
int thread_id = getThreadID();
|
||||
|
||||
// Add to the stack of current markers
|
||||
markers_stack.push(Marker(start, -1.0, name, color, layer));
|
||||
}
|
||||
ThreadData &td = m_all_threads_data[thread_id];
|
||||
AllEventData::iterator i = td.m_all_event_data.find(name);
|
||||
if (i != td.m_all_event_data.end())
|
||||
{
|
||||
i->second.setStart(m_current_frame, start, td.m_event_stack.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
EventData ed(colour, m_max_frames);
|
||||
ed.setStart(m_current_frame, start, td.m_event_stack.size());
|
||||
td.m_all_event_data[name] = ed;
|
||||
}
|
||||
|
||||
td.m_event_stack.push_back(name);
|
||||
} // pushCPUMarker
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
/// Stop the last pushed marker
|
||||
void Profiler::popCpuMarker()
|
||||
void Profiler::popCPUMarker()
|
||||
{
|
||||
// Don't do anything when frozen
|
||||
if(m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
|
||||
return;
|
||||
|
||||
ThreadInfo& ti = getThreadInfo();
|
||||
assert(ti.markers_stack[m_write_id].size() > 0);
|
||||
int thread_id = getThreadID();
|
||||
ThreadData &td = m_all_threads_data[thread_id];
|
||||
|
||||
MarkerStack& markers_stack = ti.markers_stack[m_write_id];
|
||||
MarkerList& markers_done = ti.markers_done[m_write_id];
|
||||
assert(td.m_event_stack.size() > 0);
|
||||
|
||||
// Update the date of end of the marker
|
||||
Marker& marker = markers_stack.top();
|
||||
marker.end = getTimeMilliseconds() - m_time_last_sync;
|
||||
const std::string &name = td.m_event_stack.back();
|
||||
td.m_all_event_data[name].setEnd(m_current_frame,
|
||||
getTimeMilliseconds() - m_time_last_sync);
|
||||
|
||||
// Remove the marker from the stack and add it to the list of markers done
|
||||
markers_done.push_front(marker);
|
||||
markers_stack.pop();
|
||||
}
|
||||
td.m_event_stack.pop_back();
|
||||
} // popCPUMarker
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
/// Swap buffering for the markers
|
||||
/** Saves all data for the current frame, and starts the next frame in the
|
||||
* circular buffer. Any events that are currently active (e.g. in a separate
|
||||
* thread) will be split in two parts: the beginning (till now) in the current
|
||||
* frame, the rest will be added to the next frame.
|
||||
*/
|
||||
void Profiler::synchronizeFrame()
|
||||
{
|
||||
// Don't do anything when frozen
|
||||
if(m_freeze_state == FROZEN)
|
||||
return;
|
||||
|
||||
// Avoid using several times getTimeMilliseconds(), which would yield different results
|
||||
// Avoid using several times getTimeMilliseconds(),
|
||||
// which would yield different results
|
||||
double now = getTimeMilliseconds();
|
||||
|
||||
// Swap buffers
|
||||
int old_write_id = m_write_id;
|
||||
m_write_id = !m_write_id;
|
||||
|
||||
// For each thread:
|
||||
ThreadInfoList::iterator it_end = m_thread_infos.end();
|
||||
for(ThreadInfoList::iterator it = m_thread_infos.begin() ; it != it_end ; it++)
|
||||
// Set index to next frame
|
||||
int next_frame = m_current_frame+1;
|
||||
if (next_frame >= m_max_frames)
|
||||
{
|
||||
// Get the thread information
|
||||
ThreadInfo& ti = *it;
|
||||
|
||||
MarkerList& old_markers_done = ti.markers_done[old_write_id];
|
||||
MarkerStack& old_markers_stack = ti.markers_stack[old_write_id];
|
||||
|
||||
MarkerList& new_markers_done = ti.markers_done[m_write_id];
|
||||
MarkerStack& new_markers_stack = ti.markers_stack[m_write_id];
|
||||
|
||||
// Clear the containers for the new frame
|
||||
new_markers_done.clear();
|
||||
while(!new_markers_stack.empty())
|
||||
new_markers_stack.pop();
|
||||
|
||||
// Finish the markers in the stack of the previous frame
|
||||
// and start them for the next frame.
|
||||
|
||||
// For each marker in the old stack:
|
||||
while(!old_markers_stack.empty())
|
||||
{
|
||||
// - finish the marker for the previous frame and add it to the old "done" list
|
||||
Marker& m = old_markers_stack.top();
|
||||
m.end = now - m_time_last_sync;
|
||||
old_markers_done.push_front(m);
|
||||
|
||||
// - start a new one for the new frame
|
||||
Marker new_marker(0.0, -1.0, m.name.c_str(), m.color);
|
||||
new_markers_stack.push(new_marker);
|
||||
|
||||
// - next iteration
|
||||
old_markers_stack.pop();
|
||||
}
|
||||
next_frame = 0;
|
||||
m_has_wrapped_around = true;
|
||||
}
|
||||
|
||||
// First finish all markers that are currently in progress, and add
|
||||
// a new start marker for the next frame. So e.g. if a thread is busy in
|
||||
// one event while the main thread syncs the frame, this event will get
|
||||
// split into two parts in two consecutive frames
|
||||
for (int i = 0; i < m_threads_used; i++)
|
||||
{
|
||||
ThreadData &td = m_all_threads_data[i];
|
||||
for(unsigned int j=0; j<td.m_event_stack.size(); j++)
|
||||
{
|
||||
EventData &ed = td.m_all_event_data[td.m_event_stack[j]];
|
||||
ed.setEnd(m_current_frame, now);
|
||||
ed.setStart(next_frame, now, j);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
m_current_frame = next_frame;
|
||||
|
||||
// Remember the date of last synchronization
|
||||
m_time_between_sync = now - m_time_last_sync;
|
||||
m_time_last_sync = now;
|
||||
@ -262,7 +257,7 @@ void Profiler::synchronizeFrame()
|
||||
m_freeze_state = FROZEN;
|
||||
else if(m_freeze_state == WAITING_FOR_UNFREEZE)
|
||||
m_freeze_state = UNFROZEN;
|
||||
}
|
||||
} // synchronizeFrame
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
/// Draw the markers
|
||||
@ -271,43 +266,41 @@ void Profiler::draw()
|
||||
#ifndef SERVER_ONLY
|
||||
PROFILER_PUSH_CPU_MARKER("ProfilerDraw", 0xFF, 0xFF, 0x00);
|
||||
video::IVideoDriver* driver = irr_driver->getVideoDriver();
|
||||
std::stack<Marker> hovered_markers;
|
||||
|
||||
// Current frame points to the frame in which currently data is
|
||||
// being accumulated. Draw the previous (i.e. complete) frame.
|
||||
int indx = m_current_frame - 1;
|
||||
if (indx < 0) indx = m_max_frames - 1;
|
||||
|
||||
drawBackground();
|
||||
|
||||
// Force to show the pointer
|
||||
irr_driver->showPointer();
|
||||
|
||||
int read_id = !m_write_id;
|
||||
|
||||
// Compute some values for drawing (unit: pixels, but we keep floats for reducing errors accumulation)
|
||||
core::dimension2d<u32> screen_size = driver->getScreenSize();
|
||||
// Compute some values for drawing (unit: pixels, but we keep floats
|
||||
// for reducing errors accumulation)
|
||||
core::dimension2d<u32> screen_size = driver->getScreenSize();
|
||||
const double profiler_width = (1.0 - 2.0*MARGIN_X) * screen_size.Width;
|
||||
const double x_offset = MARGIN_X*screen_size.Width;
|
||||
const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
|
||||
const double line_height = LINE_HEIGHT*screen_size.Height;
|
||||
|
||||
size_t nb_thread_infos = m_thread_infos.size();
|
||||
const double x_offset = MARGIN_X*screen_size.Width;
|
||||
const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
|
||||
const double line_height = LINE_HEIGHT*screen_size.Height;
|
||||
|
||||
|
||||
double start = -1.0f;
|
||||
double end = -1.0f;
|
||||
for (size_t i = 0; i < nb_thread_infos; i++)
|
||||
// Compute start end end time for this frame
|
||||
double start = 99999.0f;
|
||||
double end = -1.0f;
|
||||
for (int i = 0; i < m_threads_used; i++)
|
||||
{
|
||||
MarkerList& markers = m_thread_infos[i].markers_done[read_id];
|
||||
|
||||
MarkerList::const_iterator it_end = markers.end();
|
||||
for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++)
|
||||
AllEventData &aed = m_all_threads_data[i].m_all_event_data;
|
||||
AllEventData::iterator j;
|
||||
for (j = aed.begin(); j != aed.end(); ++j)
|
||||
{
|
||||
const Marker& m = *it;
|
||||
const Marker &marker = j->second.getMarker(indx);
|
||||
start = std::min(start, marker.getStart() );
|
||||
end = std::max(end, marker.getEnd() );
|
||||
} // for j in events
|
||||
} // for i in threads
|
||||
|
||||
if (start < 0.0) start = m.start;
|
||||
else start = std::min(start, m.start);
|
||||
|
||||
if (end < 0.0) end = m.end;
|
||||
else end = std::max(end, m.end);
|
||||
}
|
||||
}
|
||||
|
||||
const double duration = end - start;
|
||||
const double factor = profiler_width / duration;
|
||||
@ -315,70 +308,47 @@ void Profiler::draw()
|
||||
// Get the mouse pos
|
||||
core::vector2di mouse_pos = GUIEngine::EventHandler::get()->getMousePos();
|
||||
|
||||
// For each thread:
|
||||
for (size_t i = 0; i < nb_thread_infos; i++)
|
||||
std::stack<AllEventData::iterator> hovered_markers;
|
||||
for (int i = 0; i < m_threads_used; i++)
|
||||
{
|
||||
// Draw all markers
|
||||
MarkerList& markers = m_thread_infos[i].markers_done[read_id];
|
||||
|
||||
if (markers.empty())
|
||||
continue;
|
||||
|
||||
if (m_capture_report)
|
||||
ThreadData &td = m_all_threads_data[i];
|
||||
AllEventData &aed = td.m_all_event_data;
|
||||
AllEventData::iterator j;
|
||||
for (j = aed.begin(); j != aed.end(); ++j)
|
||||
{
|
||||
if (m_first_capture_sweep)
|
||||
m_capture_report_buffer->getStdStream() << "\"Thread\";";
|
||||
else
|
||||
m_capture_report_buffer->getStdStream() << i << ";";
|
||||
}
|
||||
MarkerList::const_iterator it_end = markers.end();
|
||||
for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++)
|
||||
{
|
||||
const Marker& m = *it;
|
||||
assert(m.end >= 0.0);
|
||||
|
||||
if (m_capture_report)
|
||||
{
|
||||
if (m_first_capture_sweep)
|
||||
m_capture_report_buffer->getStdStream() << "\"" << m.name << "\";";
|
||||
else
|
||||
m_capture_report_buffer->getStdStream() << (int)round((m.end - m.start) * 1000) << ";";
|
||||
}
|
||||
core::rect<s32> pos((s32)( x_offset + factor*m.start ),
|
||||
(s32)( y_offset + i*line_height ),
|
||||
(s32)( x_offset + factor*m.end ),
|
||||
(s32)( y_offset + (i+1)*line_height ));
|
||||
const Marker &marker = j->second.getMarker(indx);
|
||||
core::rect<s32> pos((s32)(x_offset + factor*marker.getStart()),
|
||||
(s32)(y_offset + i*line_height),
|
||||
(s32)(x_offset + factor*marker.getEnd()),
|
||||
(s32)(y_offset + (i + 1)*line_height) );
|
||||
|
||||
// Reduce vertically the size of the markers according to their layer
|
||||
pos.UpperLeftCorner.Y += m.layer*2;
|
||||
pos.LowerRightCorner.Y -= m.layer*2;
|
||||
|
||||
GL32_draw2DRectangle(m.color, pos);
|
||||
pos.UpperLeftCorner.Y += 2 * marker.getLayer();
|
||||
pos.LowerRightCorner.Y -= 2 * marker.getLayer();
|
||||
|
||||
GL32_draw2DRectangle(j->second.getColour(), pos);
|
||||
// If the mouse cursor is over the marker, get its information
|
||||
if(pos.isPointInside(mouse_pos))
|
||||
hovered_markers.push(m);
|
||||
}
|
||||
if (pos.isPointInside(mouse_pos))
|
||||
{
|
||||
hovered_markers.push(j);
|
||||
}
|
||||
|
||||
} // for j in AllEventdata
|
||||
} // for i in threads
|
||||
|
||||
if (m_capture_report)
|
||||
{
|
||||
m_capture_report_buffer->getStdStream() << "\n";
|
||||
m_first_capture_sweep = false;
|
||||
}
|
||||
}
|
||||
|
||||
// GPU profiler
|
||||
QueryPerf hovered_gpu_marker = Q_LAST;
|
||||
long hovered_gpu_marker_elapsed = 0;
|
||||
int gpu_y = int(y_offset + nb_thread_infos*line_height + line_height/2);
|
||||
int gpu_y = int(y_offset + m_threads_used*line_height + line_height/2);
|
||||
float total = 0;
|
||||
unsigned int gpu_timers[Q_LAST];
|
||||
for (unsigned i = 0; i < Q_LAST; i++)
|
||||
{
|
||||
#ifndef SERVER_ONLY
|
||||
gpu_timers[i] = irr_driver->getGPUTimer(i).elapsedTimeus();
|
||||
int n = irr_driver->getGPUTimer(i).elapsedTimeus();
|
||||
m_gpu_times[indx*Q_LAST + i] = n;
|
||||
total += n;
|
||||
#endif
|
||||
total += gpu_timers[i];
|
||||
}
|
||||
|
||||
static video::SColor colors[] = {
|
||||
@ -395,9 +365,10 @@ void Profiler::draw()
|
||||
float curr_val = 0;
|
||||
for (unsigned i = 0; i < Q_LAST; i++)
|
||||
{
|
||||
//Log::info("GPU Perf", "Phase %d : %d us\n", i, irr_driver->getGPUTimer(i).elapsedTimeus());
|
||||
//Log::info("GPU Perf", "Phase %d : %d us\n", i,
|
||||
// irr_driver->getGPUTimer(i).elapsedTimeus());
|
||||
|
||||
float elapsed = float(gpu_timers[i]);
|
||||
float elapsed = float(m_gpu_times[indx*Q_LAST+i]);
|
||||
core::rect<s32> pos((s32)(x_offset + (curr_val / total)*profiler_width),
|
||||
(s32)(y_offset + gpu_y),
|
||||
(s32)(x_offset + ((curr_val + elapsed) / total)*profiler_width),
|
||||
@ -409,32 +380,23 @@ void Profiler::draw()
|
||||
if (pos.isPointInside(mouse_pos))
|
||||
{
|
||||
hovered_gpu_marker = (QueryPerf)i;
|
||||
hovered_gpu_marker_elapsed = gpu_timers[i];
|
||||
hovered_gpu_marker_elapsed = m_gpu_times[indx*Q_LAST+i];
|
||||
}
|
||||
|
||||
if (m_capture_report)
|
||||
{
|
||||
if (m_first_gpu_capture_sweep)
|
||||
m_gpu_capture_report_buffer->getStdStream() << GPU_Phase[i] << ";";
|
||||
else
|
||||
m_gpu_capture_report_buffer->getStdStream() << elapsed << ";";
|
||||
}
|
||||
}
|
||||
|
||||
if (m_capture_report)
|
||||
{
|
||||
m_gpu_capture_report_buffer->getStdStream() << "\n";
|
||||
m_first_gpu_capture_sweep = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Draw the end of the frame
|
||||
{
|
||||
s32 x_sync = (s32)(x_offset + factor*m_time_between_sync);
|
||||
s32 y_up_sync = (s32)(MARGIN_Y*screen_size.Height);
|
||||
s32 y_down_sync = (s32)( (MARGIN_Y + (2+nb_thread_infos)*LINE_HEIGHT)*screen_size.Height );
|
||||
s32 y_down_sync = (s32)( (MARGIN_Y + (2+m_threads_used)*LINE_HEIGHT)
|
||||
* screen_size.Height );
|
||||
|
||||
GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00), core::rect<s32>(x_sync, y_up_sync, x_sync + 1, y_down_sync));
|
||||
GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00),
|
||||
core::rect<s32>(x_sync, y_up_sync,
|
||||
x_sync + 1, y_down_sync));
|
||||
}
|
||||
|
||||
// Draw the hovered markers' names
|
||||
@ -444,12 +406,13 @@ void Profiler::draw()
|
||||
core::stringw text;
|
||||
while(!hovered_markers.empty())
|
||||
{
|
||||
Marker& m = hovered_markers.top();
|
||||
AllEventData::iterator j = hovered_markers.top();
|
||||
const Marker &marker = j->second.getMarker(indx);
|
||||
std::ostringstream oss;
|
||||
oss.precision(4);
|
||||
oss << m.name << " [" << (m.end - m.start) << " ms / ";
|
||||
oss << j->first << " [" << (marker.getDuration()) << " ms / ";
|
||||
oss.precision(3);
|
||||
oss << (m.end - m.start)*100.0 / duration << "%]" << std::endl;
|
||||
oss << marker.getDuration()*100.0 / duration << "%]" << std::endl;
|
||||
text += oss.str().c_str();
|
||||
hovered_markers.pop();
|
||||
}
|
||||
@ -458,19 +421,16 @@ void Profiler::draw()
|
||||
if (hovered_gpu_marker != Q_LAST)
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << GPU_Phase[hovered_gpu_marker] << " : " << hovered_gpu_marker_elapsed << " us";
|
||||
font->draw(oss.str().c_str(), GPU_MARKERS_NAMES_POS, video::SColor(0xFF, 0xFF, 0x00, 0x00));
|
||||
oss << GPU_Phase[hovered_gpu_marker] << " : "
|
||||
<< hovered_gpu_marker_elapsed << " us";
|
||||
font->draw(oss.str().c_str(), GPU_MARKERS_NAMES_POS,
|
||||
video::SColor(0xFF, 0xFF, 0x00, 0x00));
|
||||
}
|
||||
}
|
||||
|
||||
if (m_capture_report)
|
||||
{
|
||||
font->draw("Capturing profiler report...", MARKERS_NAMES_POS, video::SColor(0xFF, 0x00, 0x90, 0x00));
|
||||
}
|
||||
|
||||
PROFILER_POP_CPU_MARKER();
|
||||
#endif
|
||||
}
|
||||
} // draw
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
/// Handle freeze/unfreeze
|
||||
@ -479,10 +439,11 @@ void Profiler::onClick(const core::vector2di& mouse_pos)
|
||||
video::IVideoDriver* driver = irr_driver->getVideoDriver();
|
||||
const core::dimension2d<u32>& screen_size = driver->getScreenSize();
|
||||
|
||||
core::rect<s32>background_rect((int)(MARGIN_X * screen_size.Width),
|
||||
(int)(MARGIN_Y * screen_size.Height),
|
||||
(int)((1.0-MARGIN_X) * screen_size.Width),
|
||||
(int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height));
|
||||
core::rect<s32>background_rect(
|
||||
(int)(MARGIN_X * screen_size.Width),
|
||||
(int)(MARGIN_Y * screen_size.Height),
|
||||
(int)((1.0-MARGIN_X) * screen_size.Width),
|
||||
(int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height) );
|
||||
|
||||
if(!background_rect.isPointInside(mouse_pos))
|
||||
return;
|
||||
@ -504,7 +465,7 @@ void Profiler::onClick(const core::vector2di& mouse_pos)
|
||||
// Same the other way around.
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // onClick
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
/// Helper to draw a white background
|
||||
@ -514,12 +475,80 @@ void Profiler::drawBackground()
|
||||
video::IVideoDriver* driver = irr_driver->getVideoDriver();
|
||||
const core::dimension2d<u32>& screen_size = driver->getScreenSize();
|
||||
|
||||
core::rect<s32>background_rect((int)(MARGIN_X * screen_size.Width),
|
||||
(int)((MARGIN_Y + 0.25f) * screen_size.Height),
|
||||
(int)((1.0-MARGIN_X) * screen_size.Width),
|
||||
(int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height));
|
||||
core::rect<s32>background_rect(
|
||||
(int)(MARGIN_X * screen_size.Width),
|
||||
(int)((MARGIN_Y + 0.25f) * screen_size.Height),
|
||||
(int)((1.0-MARGIN_X) * screen_size.Width),
|
||||
(int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height) );
|
||||
|
||||
video::SColor color(0x88, 0xFF, 0xFF, 0xFF);
|
||||
GL32_draw2DRectangle(color, background_rect);
|
||||
#endif
|
||||
}
|
||||
} // drawBackground
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
/** Saves the collected profile data to a file. Filename is based on the
|
||||
* stdout name (with -profile appended).
|
||||
*/
|
||||
void Profiler::writeToFile()
|
||||
{
|
||||
std::string base_name =
|
||||
file_manager->getUserConfigFile(file_manager->getStdoutName());
|
||||
// First CPU data
|
||||
std::ofstream f(base_name + ".profile-cpu");
|
||||
for (int thread_id = 0; thread_id < m_threads_used; thread_id++)
|
||||
{
|
||||
ThreadData &td = m_all_threads_data[thread_id];
|
||||
AllEventData::iterator j;
|
||||
std::vector<std::string> new_headings;
|
||||
for (j = td.m_all_event_data.begin(); j != td.m_all_event_data.end(); j++)
|
||||
{
|
||||
std::vector<std::string>::iterator f =
|
||||
std::find(m_all_event_names.begin(),
|
||||
m_all_event_names.end(), j->first);
|
||||
if(f==m_all_event_names.end())
|
||||
new_headings.push_back(j->first);
|
||||
}
|
||||
std::sort(new_headings.begin(), new_headings.end());
|
||||
f << "# \"Thread(1)\" ";
|
||||
for (unsigned int i = 0; i < new_headings.size(); i++)
|
||||
f << "\"" << new_headings[i] << "(" << i+2 <<")\" ";
|
||||
f << std::endl;
|
||||
int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
|
||||
if (start > m_max_frames) start -= m_max_frames;
|
||||
while (start != m_current_frame)
|
||||
{
|
||||
f << "t" << thread_id << " ";
|
||||
for (unsigned int i = 0; i < new_headings.size(); i++)
|
||||
{
|
||||
const EventData &ed = td.m_all_event_data[new_headings[i]];
|
||||
f << int(ed.getMarker(start).getDuration()*1000) << " ";
|
||||
} // for i i new_headings
|
||||
f << std::endl;
|
||||
start = (start + 1) % m_max_frames;
|
||||
}
|
||||
} // for
|
||||
f.close();
|
||||
|
||||
std::ofstream f_gpu(base_name + ".profile-gpu");
|
||||
f_gpu << "# ";
|
||||
|
||||
for (unsigned i = 0; i < Q_LAST; i++)
|
||||
{
|
||||
f_gpu << "\"" << GPU_Phase[i] << "(" << i+1 << ")\" ";
|
||||
} // for i < Q_LAST
|
||||
f_gpu << std::endl;
|
||||
|
||||
int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
|
||||
if (start > m_max_frames) start -= m_max_frames;
|
||||
while (start != m_current_frame)
|
||||
{
|
||||
for (unsigned i = 0; i < Q_LAST; i++)
|
||||
{
|
||||
f_gpu << m_gpu_times[start*Q_LAST + i] << " ";
|
||||
} // for i < Q_LAST
|
||||
f_gpu << std::endl;
|
||||
start = (start + 1) % m_max_frames;
|
||||
}
|
||||
f.close();
|
||||
} // writeFile
|
||||
|
@ -18,15 +18,20 @@
|
||||
#ifndef PROFILER_HPP
|
||||
#define PROFILER_HPP
|
||||
|
||||
#include <irrlicht.h>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <streambuf>
|
||||
#include <ostream>
|
||||
#include <iostream>
|
||||
#include "utils/synchronised.hpp"
|
||||
|
||||
#include <irrlicht.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <ostream>
|
||||
#include <stack>
|
||||
#include <streambuf>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
enum QueryPerf
|
||||
{
|
||||
@ -70,10 +75,10 @@ double getTimeMilliseconds();
|
||||
|
||||
#ifdef ENABLE_PROFILER
|
||||
#define PROFILER_PUSH_CPU_MARKER(name, r, g, b) \
|
||||
profiler.pushCpuMarker(name, video::SColor(0xFF, r, g, b))
|
||||
profiler.pushCPUMarker(name, video::SColor(0xFF, r, g, b))
|
||||
|
||||
#define PROFILER_POP_CPU_MARKER() \
|
||||
profiler.popCpuMarker()
|
||||
profiler.popCPUMarker()
|
||||
|
||||
#define PROFILER_SYNC_FRAME() \
|
||||
profiler.synchronizeFrame()
|
||||
@ -89,84 +94,160 @@ double getTimeMilliseconds();
|
||||
|
||||
using namespace irr;
|
||||
|
||||
/** For profiling reports, we need a custom strijng stream that writes to a large
|
||||
pre-allocated buffer, to avoid allocating as much as possible durign profiling */
|
||||
template <typename char_type>
|
||||
struct ostreambuf : public std::basic_streambuf<char_type, std::char_traits<char_type> >
|
||||
{
|
||||
ostreambuf(char_type* buffer, std::streamsize bufferLength)
|
||||
{
|
||||
// set the "put" pointer the start of the buffer and record it's length.
|
||||
this->setp(buffer, buffer + bufferLength);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class StringBuffer
|
||||
{
|
||||
private:
|
||||
char* m_buffer;
|
||||
ostreambuf<char> ostreamBuffer;
|
||||
std::ostream messageStream;
|
||||
|
||||
public:
|
||||
|
||||
StringBuffer(unsigned int size) : m_buffer((char*)calloc(size, 1)), ostreamBuffer(m_buffer, size), messageStream(&ostreamBuffer)
|
||||
{
|
||||
}
|
||||
|
||||
~StringBuffer()
|
||||
{
|
||||
free(m_buffer);
|
||||
}
|
||||
|
||||
std::ostream& getStdStream() { return messageStream; }
|
||||
|
||||
char* getRawBuffer() { return m_buffer; }
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief class that allows run-time graphical profiling through the use of markers
|
||||
* \ingroup utils
|
||||
*/
|
||||
// ============================================================================
|
||||
/** \brief class that allows run-time graphical profiling through the use
|
||||
* of markers.
|
||||
* \ingroup utils
|
||||
*/
|
||||
class Profiler
|
||||
{
|
||||
private:
|
||||
struct Marker
|
||||
// ------------------------------------------------------------------------
|
||||
class Marker
|
||||
{
|
||||
double start; // Times of start and end, in milliseconds,
|
||||
double end; // relatively to the time of last synchronization
|
||||
size_t layer;
|
||||
private:
|
||||
/** An event that is started (pushed) stores the start time in this
|
||||
* variable. */
|
||||
double m_start;
|
||||
|
||||
std::string name;
|
||||
video::SColor color;
|
||||
/** Duration of the event in this frame (accumulated if this event
|
||||
* should be recorded more than once). */
|
||||
|
||||
Marker(double start, double end, const char* name="N/A", const video::SColor& color=video::SColor(), size_t layer=0)
|
||||
: start(start), end(end), layer(layer), name(name), color(color)
|
||||
double m_duration;
|
||||
/** Distance of marker from root (for nested events), used to
|
||||
* adjust vertical height when drawing. */
|
||||
size_t m_layer;
|
||||
public:
|
||||
// --------------------------------------------------------------------
|
||||
Marker() { m_start = 0; m_duration = 0; m_layer = 0; }
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
Marker(double start, size_t layer=0)
|
||||
: m_start(start), m_duration(0), m_layer(layer)
|
||||
{
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
Marker(const Marker& ref)
|
||||
: start(ref.start), end(ref.end), layer(ref.layer), name(ref.name), color(ref.color)
|
||||
: m_start(ref.m_start), m_duration(ref.m_duration),
|
||||
m_layer(ref.m_layer)
|
||||
{
|
||||
}
|
||||
};
|
||||
// --------------------------------------------------------------------
|
||||
/** Returns the start time of this event marker. */
|
||||
double getStart() const { return m_start; }
|
||||
// --------------------------------------------------------------------
|
||||
/** Returns the end time of this event marker. */
|
||||
double getEnd() const { return m_start+m_duration; }
|
||||
// --------------------------------------------------------------------
|
||||
/** Returns the duration of this event. */
|
||||
double getDuration() const { return m_duration; }
|
||||
// --------------------------------------------------------------------
|
||||
size_t getLayer() const { return m_layer; }
|
||||
// --------------------------------------------------------------------
|
||||
/** Sets start time and layer for this event. */
|
||||
void setStart(double start, size_t layer = 0)
|
||||
{
|
||||
m_start = start; m_duration = 0; m_layer = layer;
|
||||
} // setStart
|
||||
// --------------------------------------------------------------------
|
||||
/** Sets the end time of this event. */
|
||||
void setEnd(double end)
|
||||
{
|
||||
m_duration += (end - m_start);
|
||||
} // setEnd
|
||||
|
||||
typedef std::list<Marker> MarkerList;
|
||||
typedef std::stack<Marker> MarkerStack;
|
||||
}; // class Marker
|
||||
|
||||
struct ThreadInfo
|
||||
// ========================================================================
|
||||
/** The data for one event. It contains the events colours, all markers
|
||||
* for the buffer period and a stack to detect nesting of markers.
|
||||
*/
|
||||
class EventData
|
||||
{
|
||||
MarkerList markers_done[2];
|
||||
MarkerStack markers_stack[2];
|
||||
};
|
||||
private:
|
||||
/** Colour to use in the on-screen display */
|
||||
video::SColor m_colour;
|
||||
|
||||
typedef std::vector<ThreadInfo> ThreadInfoList;
|
||||
/** Vector of all buffered markers. */
|
||||
std::vector<Marker> m_all_markers;
|
||||
|
||||
ThreadInfoList m_thread_infos;
|
||||
int m_write_id;
|
||||
double m_time_last_sync;
|
||||
double m_time_between_sync;
|
||||
public:
|
||||
EventData() {}
|
||||
EventData(video::SColor colour, int max_size)
|
||||
{
|
||||
m_all_markers.resize(max_size);
|
||||
m_colour = colour;
|
||||
} // EventData
|
||||
// --------------------------------------------------------------------
|
||||
/** Records the start of an event for a given frame. */
|
||||
void setStart(size_t frame, double start, int layer)
|
||||
{
|
||||
assert(frame < m_all_markers.capacity());
|
||||
m_all_markers[frame].setStart(start, layer);
|
||||
} // setStart
|
||||
// --------------------------------------------------------------------
|
||||
/** Records the end of an event for a given frame. */
|
||||
void setEnd(size_t frame, double end)
|
||||
{
|
||||
assert(frame < m_all_markers.capacity());
|
||||
m_all_markers[frame].setEnd(end);
|
||||
} // setEnd
|
||||
// --------------------------------------------------------------------
|
||||
const Marker& getMarker(int n) const { return m_all_markers[n]; }
|
||||
// --------------------------------------------------------------------
|
||||
/** Returns the colour for this event. */
|
||||
video::SColor getColour() const { return m_colour; }
|
||||
// --------------------------------------------------------------------
|
||||
}; // EventData
|
||||
|
||||
// ========================================================================
|
||||
/** The mapping of event names to the corresponding EventData. */
|
||||
typedef std::map<std::string, EventData> AllEventData;
|
||||
// ========================================================================
|
||||
struct ThreadData
|
||||
{
|
||||
/** Stack of events to detect nesting. */
|
||||
std::vector< std::string > m_event_stack;
|
||||
|
||||
AllEventData m_all_event_data;
|
||||
}; // class ThreadData
|
||||
|
||||
// ========================================================================
|
||||
|
||||
/** Data structure containing all currently buffered markers. The index
|
||||
* is the thread id. */
|
||||
std::vector< ThreadData> m_all_threads_data;
|
||||
|
||||
/** A mapping of thread_t pointers to a unique integer (starting from 0).*/
|
||||
Synchronised< std::vector<pthread_t> > m_thread_mapping;
|
||||
|
||||
/** Buffer for the GPU times (in ms). */
|
||||
std::vector<int> m_gpu_times;
|
||||
|
||||
/** Counts the threads used, i.e. registered in m_thread_mapping. */
|
||||
int m_threads_used;
|
||||
|
||||
/** Index of the current frame in the buffer. */
|
||||
int m_current_frame;
|
||||
|
||||
/** True if the circular buffer has wrapped around. */
|
||||
bool m_has_wrapped_around;
|
||||
|
||||
/** The maximum number of frames to be buffered. Used to minimise
|
||||
* reallocations. */
|
||||
int m_max_frames;
|
||||
|
||||
/** Time of last sync. All start/end times are stored relative
|
||||
* to this time. */
|
||||
double m_time_last_sync;
|
||||
|
||||
/** Time between now and last sync, used to scale the GUI bar. */
|
||||
double m_time_between_sync;
|
||||
|
||||
/** List of all event names. This list is sorted to make sure
|
||||
* if the circular buffer is dumped more than once the order
|
||||
* of events remains the same. */
|
||||
std::vector<std::string> m_all_event_names;
|
||||
|
||||
// Handling freeze/unfreeze by clicking on the display
|
||||
enum FreezeState
|
||||
@ -179,35 +260,25 @@ private:
|
||||
|
||||
FreezeState m_freeze_state;
|
||||
|
||||
bool m_capture_report;
|
||||
bool m_first_capture_sweep;
|
||||
bool m_first_gpu_capture_sweep;
|
||||
StringBuffer* m_capture_report_buffer;
|
||||
StringBuffer* m_gpu_capture_report_buffer;
|
||||
private:
|
||||
int getThreadID();
|
||||
void drawBackground();
|
||||
|
||||
public:
|
||||
Profiler();
|
||||
Profiler();
|
||||
virtual ~Profiler();
|
||||
|
||||
void pushCpuMarker(const char* name="N/A", const video::SColor& color=video::SColor());
|
||||
void popCpuMarker();
|
||||
void synchronizeFrame();
|
||||
|
||||
void draw();
|
||||
|
||||
void onClick(const core::vector2di& mouse_pos);
|
||||
|
||||
bool getCaptureReport() const { return m_capture_report; }
|
||||
void setCaptureReport(bool captureReport);
|
||||
void pushCPUMarker(const char* name="N/A",
|
||||
const video::SColor& color=video::SColor());
|
||||
void popCPUMarker();
|
||||
void synchronizeFrame();
|
||||
void draw();
|
||||
void onClick(const core::vector2di& mouse_pos);
|
||||
void writeToFile();
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
bool isFrozen() const { return m_freeze_state == FROZEN; }
|
||||
|
||||
protected:
|
||||
// TODO: detect on which thread this is called to support multithreading
|
||||
ThreadInfo& getThreadInfo() { return m_thread_infos[0]; }
|
||||
void drawBackground();
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif // PROFILER_HPP
|
||||
|
Loading…
Reference in New Issue
Block a user