Very first version of profiler that supports multiple threads

and handles events in branches better.
This commit is contained in:
hiker 2017-07-31 22:56:47 +10:00
parent 47d2e30c5f
commit ea12c8f494
4 changed files with 423 additions and 319 deletions

View File

@ -660,6 +660,10 @@ namespace UserConfigParams
/** True if graphical profiler should be displayed */
PARAM_PREFIX bool m_profiler_enabled PARAM_DEFAULT( false );
/** How many seconds worth of data the circular profile buffer
* can store. */
PARAM_PREFIX float m_profiler_buffer_duration PARAM_DEFAULT(20.0f);
// not saved to file
// ---- Networking

View File

@ -88,7 +88,7 @@ enum DebugMenuCommand
DEBUG_GRAPHICS_BULLET_2,
DEBUG_GRAPHICS_BOUNDING_BOXES_VIZ,
DEBUG_PROFILER,
DEBUG_PROFILER_GENERATE_REPORT,
DEBUG_PROFILER_WRITE_REPORT,
DEBUG_FONT_DUMP_GLYPH_PAGE,
DEBUG_FONT_RELOAD,
DEBUG_FPS,
@ -349,8 +349,8 @@ bool handleContextMenuAction(s32 cmd_id)
UserConfigParams::m_profiler_enabled =
!UserConfigParams::m_profiler_enabled;
break;
case DEBUG_PROFILER_GENERATE_REPORT:
profiler.setCaptureReport(!profiler.getCaptureReport());
case DEBUG_PROFILER_WRITE_REPORT:
profiler.writeToFile();
break;
case DEBUG_THROTTLE_FPS:
main_loop->setThrottleFPS(false);
@ -822,8 +822,8 @@ bool onEvent(const SEvent &event)
mnu->addItem(L"Profiler", DEBUG_PROFILER);
if (UserConfigParams::m_profiler_enabled)
mnu->addItem(L"Toggle capture profiler report",
DEBUG_PROFILER_GENERATE_REPORT);
mnu->addItem(L"Save profiler report",
DEBUG_PROFILER_WRITE_REPORT);
mnu->addItem(L"Do not limit FPS", DEBUG_THROTTLE_FPS);
mnu->addItem(L"Toggle FPS", DEBUG_FPS);
mnu->addItem(L"Save replay", DEBUG_SAVE_REPLAY);

View File

@ -16,6 +16,8 @@
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "profiler.hpp"
#include "config/user_config.hpp"
#include "graphics/glwrap.hpp"
#include "graphics/irr_driver.hpp"
#include "graphics/2dutils.hpp"
@ -26,11 +28,11 @@
#include "io/file_manager.hpp"
#include "utils/vs.hpp"
#include <assert.h>
#include <stack>
#include <sstream>
#include <algorithm>
#include <fstream>
#include <ostream>
#include <stack>
#include <sstream>
static const char* GPU_Phase[Q_LAST] =
{
@ -74,7 +76,8 @@ Profiler profiler;
#define MARKERS_NAMES_POS core::rect<s32>(50,100,150,200)
#define GPU_MARKERS_NAMES_POS core::rect<s32>(50,165,150,250)
#define TIME_DRAWN_MS 30.0f // the width of the profiler corresponds to TIME_DRAWN_MS milliseconds
// The width of the profiler corresponds to TIME_DRAWN_MS milliseconds
#define TIME_DRAWN_MS 30.0f
// --- Begin portable precise timer ---
#ifdef WIN32
@ -90,7 +93,7 @@ Profiler profiler;
LARGE_INTEGER timer;
QueryPerformanceCounter(&timer);
return double(timer.QuadPart) / perFreq;
}
} // getTimeMilliseconds
#else
#include <sys/time.h>
@ -99,160 +102,152 @@ Profiler profiler;
struct timeval tv;
gettimeofday(&tv, NULL);
return double(tv.tv_sec * 1000) + (double(tv.tv_usec) / 1000.0);
}
} // getTimeMilliseconds
#endif
// --- End portable precise timer ---
//-----------------------------------------------------------------------------
Profiler::Profiler()
{
m_thread_infos.resize(1); // TODO: monothread now, should support multithreading
m_write_id = 0;
m_time_last_sync = getTimeMilliseconds();
m_time_between_sync = 0.0;
m_freeze_state = UNFROZEN;
m_capture_report = false;
m_first_capture_sweep = true;
m_first_gpu_capture_sweep = true;
m_capture_report_buffer = NULL;
}
m_time_last_sync = getTimeMilliseconds();
m_time_between_sync = 0.0;
m_freeze_state = UNFROZEN;
// By limiting the number of threads that can be created, we avoid the
// problem that all access to m_all_event_data need to be locked
// (otherwise adding a thread to m_all_event_data can trigger a
// reallocate, which makes concurrent access invalid)
m_max_frames = int( UserConfigParams::m_profiler_buffer_duration
* UserConfigParams::m_max_fps );
m_current_frame = 0;
m_has_wrapped_around = false;
m_threads_used = 0;
const int MAX_THREADS = 10;
m_all_threads_data.resize(MAX_THREADS);
m_thread_mapping.getData().resize(MAX_THREADS);
m_gpu_times.resize(Q_LAST*m_max_frames);
} // Profile
//-----------------------------------------------------------------------------
Profiler::~Profiler()
{
}
} // ~Profiler
//-----------------------------------------------------------------------------
void Profiler::setCaptureReport(bool captureReport)
int Profiler::getThreadID()
{
if (!m_capture_report && captureReport)
m_thread_mapping.lock();
pthread_t thread = pthread_self();
int i = 0;
while(i < m_threads_used)
{
m_capture_report = true;
m_first_capture_sweep = true;
m_first_gpu_capture_sweep = true;
// TODO: a 20 MB hardcoded buffer for now. That should amply suffice for
// all reasonable purposes. But it's not too clean to hardcode
m_capture_report_buffer = new StringBuffer(20 * 1024 * 1024);
m_gpu_capture_report_buffer = new StringBuffer(20 * 1024 * 1024);
}
else if (m_capture_report && !captureReport)
{
// when disabling capture to file, flush captured data to a file
if (memcmp( &m_thread_mapping.getData()[i],
&thread,
sizeof(thread)) ==0 )
{
std::ofstream filewriter(file_manager->getUserConfigFile("profiling.csv").c_str(), std::ios::out | std::ios::binary);
const char* str = m_capture_report_buffer->getRawBuffer();
filewriter.write(str, strlen(str));
}
{
std::ofstream filewriter(file_manager->getUserConfigFile("profiling_gpu.csv").c_str(), std::ios::out | std::ios::binary);
const char* str = m_gpu_capture_report_buffer->getRawBuffer();
filewriter.write(str, strlen(str));
m_thread_mapping.unlock();
return i;
}
i++;
} // for i <m_threads_used
m_capture_report = false;
assert(m_threads_used < (int)m_thread_mapping.getData().size());
m_thread_mapping.getData()[m_threads_used] = thread;
m_threads_used++;
m_thread_mapping.unlock();
delete m_capture_report_buffer;
m_capture_report_buffer = NULL;
delete m_gpu_capture_report_buffer;
m_gpu_capture_report_buffer = NULL;
}
}
return m_threads_used - 1;
} // getThreadID
//-----------------------------------------------------------------------------
/// Push a new marker that starts now
void Profiler::pushCpuMarker(const char* name, const video::SColor& color)
void Profiler::pushCPUMarker(const char* name, const video::SColor& colour)
{
// Don't do anything when frozen
if(m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
if (m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
return;
ThreadInfo& ti = getThreadInfo();
MarkerStack& markers_stack = ti.markers_stack[m_write_id];
double start = getTimeMilliseconds() - m_time_last_sync;
size_t layer = markers_stack.size();
int thread_id = getThreadID();
// Add to the stack of current markers
markers_stack.push(Marker(start, -1.0, name, color, layer));
}
ThreadData &td = m_all_threads_data[thread_id];
AllEventData::iterator i = td.m_all_event_data.find(name);
if (i != td.m_all_event_data.end())
{
i->second.setStart(m_current_frame, start, td.m_event_stack.size());
}
else
{
EventData ed(colour, m_max_frames);
ed.setStart(m_current_frame, start, td.m_event_stack.size());
td.m_all_event_data[name] = ed;
}
td.m_event_stack.push_back(name);
} // pushCPUMarker
//-----------------------------------------------------------------------------
/// Stop the last pushed marker
void Profiler::popCpuMarker()
void Profiler::popCPUMarker()
{
// Don't do anything when frozen
if(m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE)
return;
ThreadInfo& ti = getThreadInfo();
assert(ti.markers_stack[m_write_id].size() > 0);
int thread_id = getThreadID();
ThreadData &td = m_all_threads_data[thread_id];
MarkerStack& markers_stack = ti.markers_stack[m_write_id];
MarkerList& markers_done = ti.markers_done[m_write_id];
assert(td.m_event_stack.size() > 0);
// Update the date of end of the marker
Marker& marker = markers_stack.top();
marker.end = getTimeMilliseconds() - m_time_last_sync;
const std::string &name = td.m_event_stack.back();
td.m_all_event_data[name].setEnd(m_current_frame,
getTimeMilliseconds() - m_time_last_sync);
// Remove the marker from the stack and add it to the list of markers done
markers_done.push_front(marker);
markers_stack.pop();
}
td.m_event_stack.pop_back();
} // popCPUMarker
//-----------------------------------------------------------------------------
/// Swap buffering for the markers
/** Saves all data for the current frame, and starts the next frame in the
* circular buffer. Any events that are currently active (e.g. in a separate
* thread) will be split in two parts: the beginning (till now) in the current
* frame, the rest will be added to the next frame.
*/
void Profiler::synchronizeFrame()
{
// Don't do anything when frozen
if(m_freeze_state == FROZEN)
return;
// Avoid using several times getTimeMilliseconds(), which would yield different results
// Avoid using several times getTimeMilliseconds(),
// which would yield different results
double now = getTimeMilliseconds();
// Swap buffers
int old_write_id = m_write_id;
m_write_id = !m_write_id;
// For each thread:
ThreadInfoList::iterator it_end = m_thread_infos.end();
for(ThreadInfoList::iterator it = m_thread_infos.begin() ; it != it_end ; it++)
// Set index to next frame
int next_frame = m_current_frame+1;
if (next_frame >= m_max_frames)
{
// Get the thread information
ThreadInfo& ti = *it;
MarkerList& old_markers_done = ti.markers_done[old_write_id];
MarkerStack& old_markers_stack = ti.markers_stack[old_write_id];
MarkerList& new_markers_done = ti.markers_done[m_write_id];
MarkerStack& new_markers_stack = ti.markers_stack[m_write_id];
// Clear the containers for the new frame
new_markers_done.clear();
while(!new_markers_stack.empty())
new_markers_stack.pop();
// Finish the markers in the stack of the previous frame
// and start them for the next frame.
// For each marker in the old stack:
while(!old_markers_stack.empty())
{
// - finish the marker for the previous frame and add it to the old "done" list
Marker& m = old_markers_stack.top();
m.end = now - m_time_last_sync;
old_markers_done.push_front(m);
// - start a new one for the new frame
Marker new_marker(0.0, -1.0, m.name.c_str(), m.color);
new_markers_stack.push(new_marker);
// - next iteration
old_markers_stack.pop();
}
next_frame = 0;
m_has_wrapped_around = true;
}
// First finish all markers that are currently in progress, and add
// a new start marker for the next frame. So e.g. if a thread is busy in
// one event while the main thread syncs the frame, this event will get
// split into two parts in two consecutive frames
for (int i = 0; i < m_threads_used; i++)
{
ThreadData &td = m_all_threads_data[i];
for(unsigned int j=0; j<td.m_event_stack.size(); j++)
{
EventData &ed = td.m_all_event_data[td.m_event_stack[j]];
ed.setEnd(m_current_frame, now);
ed.setStart(next_frame, now, j);
}
}
m_current_frame = next_frame;
// Remember the date of last synchronization
m_time_between_sync = now - m_time_last_sync;
m_time_last_sync = now;
@ -262,7 +257,7 @@ void Profiler::synchronizeFrame()
m_freeze_state = FROZEN;
else if(m_freeze_state == WAITING_FOR_UNFREEZE)
m_freeze_state = UNFROZEN;
}
} // synchronizeFrame
//-----------------------------------------------------------------------------
/// Draw the markers
@ -271,43 +266,41 @@ void Profiler::draw()
#ifndef SERVER_ONLY
PROFILER_PUSH_CPU_MARKER("ProfilerDraw", 0xFF, 0xFF, 0x00);
video::IVideoDriver* driver = irr_driver->getVideoDriver();
std::stack<Marker> hovered_markers;
// Current frame points to the frame in which currently data is
// being accumulated. Draw the previous (i.e. complete) frame.
int indx = m_current_frame - 1;
if (indx < 0) indx = m_max_frames - 1;
drawBackground();
// Force to show the pointer
irr_driver->showPointer();
int read_id = !m_write_id;
// Compute some values for drawing (unit: pixels, but we keep floats for reducing errors accumulation)
core::dimension2d<u32> screen_size = driver->getScreenSize();
// Compute some values for drawing (unit: pixels, but we keep floats
// for reducing errors accumulation)
core::dimension2d<u32> screen_size = driver->getScreenSize();
const double profiler_width = (1.0 - 2.0*MARGIN_X) * screen_size.Width;
const double x_offset = MARGIN_X*screen_size.Width;
const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
const double line_height = LINE_HEIGHT*screen_size.Height;
size_t nb_thread_infos = m_thread_infos.size();
const double x_offset = MARGIN_X*screen_size.Width;
const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height;
const double line_height = LINE_HEIGHT*screen_size.Height;
double start = -1.0f;
double end = -1.0f;
for (size_t i = 0; i < nb_thread_infos; i++)
// Compute start end end time for this frame
double start = 99999.0f;
double end = -1.0f;
for (int i = 0; i < m_threads_used; i++)
{
MarkerList& markers = m_thread_infos[i].markers_done[read_id];
MarkerList::const_iterator it_end = markers.end();
for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++)
AllEventData &aed = m_all_threads_data[i].m_all_event_data;
AllEventData::iterator j;
for (j = aed.begin(); j != aed.end(); ++j)
{
const Marker& m = *it;
const Marker &marker = j->second.getMarker(indx);
start = std::min(start, marker.getStart() );
end = std::max(end, marker.getEnd() );
} // for j in events
} // for i in threads
if (start < 0.0) start = m.start;
else start = std::min(start, m.start);
if (end < 0.0) end = m.end;
else end = std::max(end, m.end);
}
}
const double duration = end - start;
const double factor = profiler_width / duration;
@ -315,70 +308,47 @@ void Profiler::draw()
// Get the mouse pos
core::vector2di mouse_pos = GUIEngine::EventHandler::get()->getMousePos();
// For each thread:
for (size_t i = 0; i < nb_thread_infos; i++)
std::stack<AllEventData::iterator> hovered_markers;
for (int i = 0; i < m_threads_used; i++)
{
// Draw all markers
MarkerList& markers = m_thread_infos[i].markers_done[read_id];
if (markers.empty())
continue;
if (m_capture_report)
ThreadData &td = m_all_threads_data[i];
AllEventData &aed = td.m_all_event_data;
AllEventData::iterator j;
for (j = aed.begin(); j != aed.end(); ++j)
{
if (m_first_capture_sweep)
m_capture_report_buffer->getStdStream() << "\"Thread\";";
else
m_capture_report_buffer->getStdStream() << i << ";";
}
MarkerList::const_iterator it_end = markers.end();
for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++)
{
const Marker& m = *it;
assert(m.end >= 0.0);
if (m_capture_report)
{
if (m_first_capture_sweep)
m_capture_report_buffer->getStdStream() << "\"" << m.name << "\";";
else
m_capture_report_buffer->getStdStream() << (int)round((m.end - m.start) * 1000) << ";";
}
core::rect<s32> pos((s32)( x_offset + factor*m.start ),
(s32)( y_offset + i*line_height ),
(s32)( x_offset + factor*m.end ),
(s32)( y_offset + (i+1)*line_height ));
const Marker &marker = j->second.getMarker(indx);
core::rect<s32> pos((s32)(x_offset + factor*marker.getStart()),
(s32)(y_offset + i*line_height),
(s32)(x_offset + factor*marker.getEnd()),
(s32)(y_offset + (i + 1)*line_height) );
// Reduce vertically the size of the markers according to their layer
pos.UpperLeftCorner.Y += m.layer*2;
pos.LowerRightCorner.Y -= m.layer*2;
GL32_draw2DRectangle(m.color, pos);
pos.UpperLeftCorner.Y += 2 * marker.getLayer();
pos.LowerRightCorner.Y -= 2 * marker.getLayer();
GL32_draw2DRectangle(j->second.getColour(), pos);
// If the mouse cursor is over the marker, get its information
if(pos.isPointInside(mouse_pos))
hovered_markers.push(m);
}
if (pos.isPointInside(mouse_pos))
{
hovered_markers.push(j);
}
} // for j in AllEventdata
} // for i in threads
if (m_capture_report)
{
m_capture_report_buffer->getStdStream() << "\n";
m_first_capture_sweep = false;
}
}
// GPU profiler
QueryPerf hovered_gpu_marker = Q_LAST;
long hovered_gpu_marker_elapsed = 0;
int gpu_y = int(y_offset + nb_thread_infos*line_height + line_height/2);
int gpu_y = int(y_offset + m_threads_used*line_height + line_height/2);
float total = 0;
unsigned int gpu_timers[Q_LAST];
for (unsigned i = 0; i < Q_LAST; i++)
{
#ifndef SERVER_ONLY
gpu_timers[i] = irr_driver->getGPUTimer(i).elapsedTimeus();
int n = irr_driver->getGPUTimer(i).elapsedTimeus();
m_gpu_times[indx*Q_LAST + i] = n;
total += n;
#endif
total += gpu_timers[i];
}
static video::SColor colors[] = {
@ -395,9 +365,10 @@ void Profiler::draw()
float curr_val = 0;
for (unsigned i = 0; i < Q_LAST; i++)
{
//Log::info("GPU Perf", "Phase %d : %d us\n", i, irr_driver->getGPUTimer(i).elapsedTimeus());
//Log::info("GPU Perf", "Phase %d : %d us\n", i,
// irr_driver->getGPUTimer(i).elapsedTimeus());
float elapsed = float(gpu_timers[i]);
float elapsed = float(m_gpu_times[indx*Q_LAST+i]);
core::rect<s32> pos((s32)(x_offset + (curr_val / total)*profiler_width),
(s32)(y_offset + gpu_y),
(s32)(x_offset + ((curr_val + elapsed) / total)*profiler_width),
@ -409,32 +380,23 @@ void Profiler::draw()
if (pos.isPointInside(mouse_pos))
{
hovered_gpu_marker = (QueryPerf)i;
hovered_gpu_marker_elapsed = gpu_timers[i];
hovered_gpu_marker_elapsed = m_gpu_times[indx*Q_LAST+i];
}
if (m_capture_report)
{
if (m_first_gpu_capture_sweep)
m_gpu_capture_report_buffer->getStdStream() << GPU_Phase[i] << ";";
else
m_gpu_capture_report_buffer->getStdStream() << elapsed << ";";
}
}
if (m_capture_report)
{
m_gpu_capture_report_buffer->getStdStream() << "\n";
m_first_gpu_capture_sweep = false;
}
}
// Draw the end of the frame
{
s32 x_sync = (s32)(x_offset + factor*m_time_between_sync);
s32 y_up_sync = (s32)(MARGIN_Y*screen_size.Height);
s32 y_down_sync = (s32)( (MARGIN_Y + (2+nb_thread_infos)*LINE_HEIGHT)*screen_size.Height );
s32 y_down_sync = (s32)( (MARGIN_Y + (2+m_threads_used)*LINE_HEIGHT)
* screen_size.Height );
GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00), core::rect<s32>(x_sync, y_up_sync, x_sync + 1, y_down_sync));
GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00),
core::rect<s32>(x_sync, y_up_sync,
x_sync + 1, y_down_sync));
}
// Draw the hovered markers' names
@ -444,12 +406,13 @@ void Profiler::draw()
core::stringw text;
while(!hovered_markers.empty())
{
Marker& m = hovered_markers.top();
AllEventData::iterator j = hovered_markers.top();
const Marker &marker = j->second.getMarker(indx);
std::ostringstream oss;
oss.precision(4);
oss << m.name << " [" << (m.end - m.start) << " ms / ";
oss << j->first << " [" << (marker.getDuration()) << " ms / ";
oss.precision(3);
oss << (m.end - m.start)*100.0 / duration << "%]" << std::endl;
oss << marker.getDuration()*100.0 / duration << "%]" << std::endl;
text += oss.str().c_str();
hovered_markers.pop();
}
@ -458,19 +421,16 @@ void Profiler::draw()
if (hovered_gpu_marker != Q_LAST)
{
std::ostringstream oss;
oss << GPU_Phase[hovered_gpu_marker] << " : " << hovered_gpu_marker_elapsed << " us";
font->draw(oss.str().c_str(), GPU_MARKERS_NAMES_POS, video::SColor(0xFF, 0xFF, 0x00, 0x00));
oss << GPU_Phase[hovered_gpu_marker] << " : "
<< hovered_gpu_marker_elapsed << " us";
font->draw(oss.str().c_str(), GPU_MARKERS_NAMES_POS,
video::SColor(0xFF, 0xFF, 0x00, 0x00));
}
}
if (m_capture_report)
{
font->draw("Capturing profiler report...", MARKERS_NAMES_POS, video::SColor(0xFF, 0x00, 0x90, 0x00));
}
PROFILER_POP_CPU_MARKER();
#endif
}
} // draw
//-----------------------------------------------------------------------------
/// Handle freeze/unfreeze
@ -479,10 +439,11 @@ void Profiler::onClick(const core::vector2di& mouse_pos)
video::IVideoDriver* driver = irr_driver->getVideoDriver();
const core::dimension2d<u32>& screen_size = driver->getScreenSize();
core::rect<s32>background_rect((int)(MARGIN_X * screen_size.Width),
(int)(MARGIN_Y * screen_size.Height),
(int)((1.0-MARGIN_X) * screen_size.Width),
(int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height));
core::rect<s32>background_rect(
(int)(MARGIN_X * screen_size.Width),
(int)(MARGIN_Y * screen_size.Height),
(int)((1.0-MARGIN_X) * screen_size.Width),
(int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height) );
if(!background_rect.isPointInside(mouse_pos))
return;
@ -504,7 +465,7 @@ void Profiler::onClick(const core::vector2di& mouse_pos)
// Same the other way around.
break;
}
}
} // onClick
//-----------------------------------------------------------------------------
/// Helper to draw a white background
@ -514,12 +475,80 @@ void Profiler::drawBackground()
video::IVideoDriver* driver = irr_driver->getVideoDriver();
const core::dimension2d<u32>& screen_size = driver->getScreenSize();
core::rect<s32>background_rect((int)(MARGIN_X * screen_size.Width),
(int)((MARGIN_Y + 0.25f) * screen_size.Height),
(int)((1.0-MARGIN_X) * screen_size.Width),
(int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height));
core::rect<s32>background_rect(
(int)(MARGIN_X * screen_size.Width),
(int)((MARGIN_Y + 0.25f) * screen_size.Height),
(int)((1.0-MARGIN_X) * screen_size.Width),
(int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height) );
video::SColor color(0x88, 0xFF, 0xFF, 0xFF);
GL32_draw2DRectangle(color, background_rect);
#endif
}
} // drawBackground
//-----------------------------------------------------------------------------
/** Saves the collected profile data to a file. Filename is based on the
* stdout name (with -profile appended).
*/
void Profiler::writeToFile()
{
std::string base_name =
file_manager->getUserConfigFile(file_manager->getStdoutName());
// First CPU data
std::ofstream f(base_name + ".profile-cpu");
for (int thread_id = 0; thread_id < m_threads_used; thread_id++)
{
ThreadData &td = m_all_threads_data[thread_id];
AllEventData::iterator j;
std::vector<std::string> new_headings;
for (j = td.m_all_event_data.begin(); j != td.m_all_event_data.end(); j++)
{
std::vector<std::string>::iterator f =
std::find(m_all_event_names.begin(),
m_all_event_names.end(), j->first);
if(f==m_all_event_names.end())
new_headings.push_back(j->first);
}
std::sort(new_headings.begin(), new_headings.end());
f << "# \"Thread(1)\" ";
for (unsigned int i = 0; i < new_headings.size(); i++)
f << "\"" << new_headings[i] << "(" << i+2 <<")\" ";
f << std::endl;
int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
if (start > m_max_frames) start -= m_max_frames;
while (start != m_current_frame)
{
f << "t" << thread_id << " ";
for (unsigned int i = 0; i < new_headings.size(); i++)
{
const EventData &ed = td.m_all_event_data[new_headings[i]];
f << int(ed.getMarker(start).getDuration()*1000) << " ";
} // for i i new_headings
f << std::endl;
start = (start + 1) % m_max_frames;
}
} // for
f.close();
std::ofstream f_gpu(base_name + ".profile-gpu");
f_gpu << "# ";
for (unsigned i = 0; i < Q_LAST; i++)
{
f_gpu << "\"" << GPU_Phase[i] << "(" << i+1 << ")\" ";
} // for i < Q_LAST
f_gpu << std::endl;
int start = m_has_wrapped_around ? m_current_frame + 1 : 0;
if (start > m_max_frames) start -= m_max_frames;
while (start != m_current_frame)
{
for (unsigned i = 0; i < Q_LAST; i++)
{
f_gpu << m_gpu_times[start*Q_LAST + i] << " ";
} // for i < Q_LAST
f_gpu << std::endl;
start = (start + 1) % m_max_frames;
}
f.close();
} // writeFile

View File

@ -18,15 +18,20 @@
#ifndef PROFILER_HPP
#define PROFILER_HPP
#include <irrlicht.h>
#include <list>
#include <vector>
#include <stack>
#include <string>
#include <streambuf>
#include <ostream>
#include <iostream>
#include "utils/synchronised.hpp"
#include <irrlicht.h>
#include <pthread.h>
#include <assert.h>
#include <iostream>
#include <list>
#include <map>
#include <ostream>
#include <stack>
#include <streambuf>
#include <string>
#include <vector>
enum QueryPerf
{
@ -70,10 +75,10 @@ double getTimeMilliseconds();
#ifdef ENABLE_PROFILER
#define PROFILER_PUSH_CPU_MARKER(name, r, g, b) \
profiler.pushCpuMarker(name, video::SColor(0xFF, r, g, b))
profiler.pushCPUMarker(name, video::SColor(0xFF, r, g, b))
#define PROFILER_POP_CPU_MARKER() \
profiler.popCpuMarker()
profiler.popCPUMarker()
#define PROFILER_SYNC_FRAME() \
profiler.synchronizeFrame()
@ -89,84 +94,160 @@ double getTimeMilliseconds();
using namespace irr;
/** For profiling reports, we need a custom strijng stream that writes to a large
pre-allocated buffer, to avoid allocating as much as possible durign profiling */
template <typename char_type>
struct ostreambuf : public std::basic_streambuf<char_type, std::char_traits<char_type> >
{
ostreambuf(char_type* buffer, std::streamsize bufferLength)
{
// set the "put" pointer the start of the buffer and record it's length.
this->setp(buffer, buffer + bufferLength);
}
};
class StringBuffer
{
private:
char* m_buffer;
ostreambuf<char> ostreamBuffer;
std::ostream messageStream;
public:
StringBuffer(unsigned int size) : m_buffer((char*)calloc(size, 1)), ostreamBuffer(m_buffer, size), messageStream(&ostreamBuffer)
{
}
~StringBuffer()
{
free(m_buffer);
}
std::ostream& getStdStream() { return messageStream; }
char* getRawBuffer() { return m_buffer; }
};
/**
* \brief class that allows run-time graphical profiling through the use of markers
* \ingroup utils
*/
// ============================================================================
/** \brief class that allows run-time graphical profiling through the use
* of markers.
* \ingroup utils
*/
class Profiler
{
private:
struct Marker
// ------------------------------------------------------------------------
class Marker
{
double start; // Times of start and end, in milliseconds,
double end; // relatively to the time of last synchronization
size_t layer;
private:
/** An event that is started (pushed) stores the start time in this
* variable. */
double m_start;
std::string name;
video::SColor color;
/** Duration of the event in this frame (accumulated if this event
* should be recorded more than once). */
Marker(double start, double end, const char* name="N/A", const video::SColor& color=video::SColor(), size_t layer=0)
: start(start), end(end), layer(layer), name(name), color(color)
double m_duration;
/** Distance of marker from root (for nested events), used to
* adjust vertical height when drawing. */
size_t m_layer;
public:
// --------------------------------------------------------------------
Marker() { m_start = 0; m_duration = 0; m_layer = 0; }
// --------------------------------------------------------------------
Marker(double start, size_t layer=0)
: m_start(start), m_duration(0), m_layer(layer)
{
}
// --------------------------------------------------------------------
Marker(const Marker& ref)
: start(ref.start), end(ref.end), layer(ref.layer), name(ref.name), color(ref.color)
: m_start(ref.m_start), m_duration(ref.m_duration),
m_layer(ref.m_layer)
{
}
};
// --------------------------------------------------------------------
/** Returns the start time of this event marker. */
double getStart() const { return m_start; }
// --------------------------------------------------------------------
/** Returns the end time of this event marker. */
double getEnd() const { return m_start+m_duration; }
// --------------------------------------------------------------------
/** Returns the duration of this event. */
double getDuration() const { return m_duration; }
// --------------------------------------------------------------------
size_t getLayer() const { return m_layer; }
// --------------------------------------------------------------------
/** Sets start time and layer for this event. */
void setStart(double start, size_t layer = 0)
{
m_start = start; m_duration = 0; m_layer = layer;
} // setStart
// --------------------------------------------------------------------
/** Sets the end time of this event. */
void setEnd(double end)
{
m_duration += (end - m_start);
} // setEnd
typedef std::list<Marker> MarkerList;
typedef std::stack<Marker> MarkerStack;
}; // class Marker
struct ThreadInfo
// ========================================================================
/** The data for one event. It contains the events colours, all markers
* for the buffer period and a stack to detect nesting of markers.
*/
class EventData
{
MarkerList markers_done[2];
MarkerStack markers_stack[2];
};
private:
/** Colour to use in the on-screen display */
video::SColor m_colour;
typedef std::vector<ThreadInfo> ThreadInfoList;
/** Vector of all buffered markers. */
std::vector<Marker> m_all_markers;
ThreadInfoList m_thread_infos;
int m_write_id;
double m_time_last_sync;
double m_time_between_sync;
public:
EventData() {}
EventData(video::SColor colour, int max_size)
{
m_all_markers.resize(max_size);
m_colour = colour;
} // EventData
// --------------------------------------------------------------------
/** Records the start of an event for a given frame. */
void setStart(size_t frame, double start, int layer)
{
assert(frame < m_all_markers.capacity());
m_all_markers[frame].setStart(start, layer);
} // setStart
// --------------------------------------------------------------------
/** Records the end of an event for a given frame. */
void setEnd(size_t frame, double end)
{
assert(frame < m_all_markers.capacity());
m_all_markers[frame].setEnd(end);
} // setEnd
// --------------------------------------------------------------------
const Marker& getMarker(int n) const { return m_all_markers[n]; }
// --------------------------------------------------------------------
/** Returns the colour for this event. */
video::SColor getColour() const { return m_colour; }
// --------------------------------------------------------------------
}; // EventData
// ========================================================================
/** The mapping of event names to the corresponding EventData. */
typedef std::map<std::string, EventData> AllEventData;
// ========================================================================
struct ThreadData
{
/** Stack of events to detect nesting. */
std::vector< std::string > m_event_stack;
AllEventData m_all_event_data;
}; // class ThreadData
// ========================================================================
/** Data structure containing all currently buffered markers. The index
* is the thread id. */
std::vector< ThreadData> m_all_threads_data;
/** A mapping of thread_t pointers to a unique integer (starting from 0).*/
Synchronised< std::vector<pthread_t> > m_thread_mapping;
/** Buffer for the GPU times (in ms). */
std::vector<int> m_gpu_times;
/** Counts the threads used, i.e. registered in m_thread_mapping. */
int m_threads_used;
/** Index of the current frame in the buffer. */
int m_current_frame;
/** True if the circular buffer has wrapped around. */
bool m_has_wrapped_around;
/** The maximum number of frames to be buffered. Used to minimise
* reallocations. */
int m_max_frames;
/** Time of last sync. All start/end times are stored relative
* to this time. */
double m_time_last_sync;
/** Time between now and last sync, used to scale the GUI bar. */
double m_time_between_sync;
/** List of all event names. This list is sorted to make sure
* if the circular buffer is dumped more than once the order
* of events remains the same. */
std::vector<std::string> m_all_event_names;
// Handling freeze/unfreeze by clicking on the display
enum FreezeState
@ -179,35 +260,25 @@ private:
FreezeState m_freeze_state;
bool m_capture_report;
bool m_first_capture_sweep;
bool m_first_gpu_capture_sweep;
StringBuffer* m_capture_report_buffer;
StringBuffer* m_gpu_capture_report_buffer;
private:
int getThreadID();
void drawBackground();
public:
Profiler();
Profiler();
virtual ~Profiler();
void pushCpuMarker(const char* name="N/A", const video::SColor& color=video::SColor());
void popCpuMarker();
void synchronizeFrame();
void draw();
void onClick(const core::vector2di& mouse_pos);
bool getCaptureReport() const { return m_capture_report; }
void setCaptureReport(bool captureReport);
void pushCPUMarker(const char* name="N/A",
const video::SColor& color=video::SColor());
void popCPUMarker();
void synchronizeFrame();
void draw();
void onClick(const core::vector2di& mouse_pos);
void writeToFile();
// ------------------------------------------------------------------------
bool isFrozen() const { return m_freeze_state == FROZEN; }
protected:
// TODO: detect on which thread this is called to support multithreading
ThreadInfo& getThreadInfo() { return m_thread_infos[0]; }
void drawBackground();
};
#endif // PROFILER_HPP