diff --git a/src/config/user_config.hpp b/src/config/user_config.hpp index 3d91c18d5..954848541 100644 --- a/src/config/user_config.hpp +++ b/src/config/user_config.hpp @@ -660,6 +660,10 @@ namespace UserConfigParams /** True if graphical profiler should be displayed */ PARAM_PREFIX bool m_profiler_enabled PARAM_DEFAULT( false ); + /** How many seconds worth of data the circular profile buffer + * can store. */ + PARAM_PREFIX float m_profiler_buffer_duration PARAM_DEFAULT(20.0f); + // not saved to file // ---- Networking diff --git a/src/utils/debug.cpp b/src/utils/debug.cpp index 5311b1889..c4d475742 100644 --- a/src/utils/debug.cpp +++ b/src/utils/debug.cpp @@ -88,7 +88,7 @@ enum DebugMenuCommand DEBUG_GRAPHICS_BULLET_2, DEBUG_GRAPHICS_BOUNDING_BOXES_VIZ, DEBUG_PROFILER, - DEBUG_PROFILER_GENERATE_REPORT, + DEBUG_PROFILER_WRITE_REPORT, DEBUG_FONT_DUMP_GLYPH_PAGE, DEBUG_FONT_RELOAD, DEBUG_FPS, @@ -349,8 +349,8 @@ bool handleContextMenuAction(s32 cmd_id) UserConfigParams::m_profiler_enabled = !UserConfigParams::m_profiler_enabled; break; - case DEBUG_PROFILER_GENERATE_REPORT: - profiler.setCaptureReport(!profiler.getCaptureReport()); + case DEBUG_PROFILER_WRITE_REPORT: + profiler.writeToFile(); break; case DEBUG_THROTTLE_FPS: main_loop->setThrottleFPS(false); @@ -822,8 +822,8 @@ bool onEvent(const SEvent &event) mnu->addItem(L"Profiler", DEBUG_PROFILER); if (UserConfigParams::m_profiler_enabled) - mnu->addItem(L"Toggle capture profiler report", - DEBUG_PROFILER_GENERATE_REPORT); + mnu->addItem(L"Save profiler report", + DEBUG_PROFILER_WRITE_REPORT); mnu->addItem(L"Do not limit FPS", DEBUG_THROTTLE_FPS); mnu->addItem(L"Toggle FPS", DEBUG_FPS); mnu->addItem(L"Save replay", DEBUG_SAVE_REPLAY); diff --git a/src/utils/profiler.cpp b/src/utils/profiler.cpp index b359cdcf6..9433ac4d8 100644 --- a/src/utils/profiler.cpp +++ b/src/utils/profiler.cpp @@ -16,6 +16,8 @@ // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #include "profiler.hpp" + +#include "config/user_config.hpp" #include "graphics/glwrap.hpp" #include "graphics/irr_driver.hpp" #include "graphics/2dutils.hpp" @@ -26,11 +28,11 @@ #include "io/file_manager.hpp" #include "utils/vs.hpp" -#include -#include -#include #include #include +#include +#include +#include static const char* GPU_Phase[Q_LAST] = { @@ -74,7 +76,8 @@ Profiler profiler; #define MARKERS_NAMES_POS core::rect(50,100,150,200) #define GPU_MARKERS_NAMES_POS core::rect(50,165,150,250) -#define TIME_DRAWN_MS 30.0f // the width of the profiler corresponds to TIME_DRAWN_MS milliseconds +// The width of the profiler corresponds to TIME_DRAWN_MS milliseconds +#define TIME_DRAWN_MS 30.0f // --- Begin portable precise timer --- #ifdef WIN32 @@ -90,7 +93,7 @@ Profiler profiler; LARGE_INTEGER timer; QueryPerformanceCounter(&timer); return double(timer.QuadPart) / perFreq; - } + } // getTimeMilliseconds #else #include @@ -99,160 +102,152 @@ Profiler profiler; struct timeval tv; gettimeofday(&tv, NULL); return double(tv.tv_sec * 1000) + (double(tv.tv_usec) / 1000.0); - } + } // getTimeMilliseconds #endif // --- End portable precise timer --- //----------------------------------------------------------------------------- Profiler::Profiler() { - m_thread_infos.resize(1); // TODO: monothread now, should support multithreading - m_write_id = 0; - m_time_last_sync = getTimeMilliseconds(); - m_time_between_sync = 0.0; - m_freeze_state = UNFROZEN; - m_capture_report = false; - m_first_capture_sweep = true; - m_first_gpu_capture_sweep = true; - m_capture_report_buffer = NULL; -} + m_time_last_sync = getTimeMilliseconds(); + m_time_between_sync = 0.0; + m_freeze_state = UNFROZEN; + + // By limiting the number of threads that can be created, we avoid the + // problem that all access to m_all_event_data need to be locked + // (otherwise adding a thread to m_all_event_data can trigger a + // reallocate, which makes concurrent access invalid) + m_max_frames = int( UserConfigParams::m_profiler_buffer_duration + * UserConfigParams::m_max_fps ); + m_current_frame = 0; + m_has_wrapped_around = false; + m_threads_used = 0; + const int MAX_THREADS = 10; + m_all_threads_data.resize(MAX_THREADS); + m_thread_mapping.getData().resize(MAX_THREADS); + m_gpu_times.resize(Q_LAST*m_max_frames); +} // Profile //----------------------------------------------------------------------------- Profiler::~Profiler() { -} +} // ~Profiler //----------------------------------------------------------------------------- - -void Profiler::setCaptureReport(bool captureReport) +int Profiler::getThreadID() { - if (!m_capture_report && captureReport) + m_thread_mapping.lock(); + pthread_t thread = pthread_self(); + int i = 0; + while(i < m_threads_used) { - m_capture_report = true; - m_first_capture_sweep = true; - m_first_gpu_capture_sweep = true; - // TODO: a 20 MB hardcoded buffer for now. That should amply suffice for - // all reasonable purposes. But it's not too clean to hardcode - m_capture_report_buffer = new StringBuffer(20 * 1024 * 1024); - m_gpu_capture_report_buffer = new StringBuffer(20 * 1024 * 1024); - } - else if (m_capture_report && !captureReport) - { - // when disabling capture to file, flush captured data to a file + if (memcmp( &m_thread_mapping.getData()[i], + &thread, + sizeof(thread)) ==0 ) { - std::ofstream filewriter(file_manager->getUserConfigFile("profiling.csv").c_str(), std::ios::out | std::ios::binary); - const char* str = m_capture_report_buffer->getRawBuffer(); - filewriter.write(str, strlen(str)); - } - { - std::ofstream filewriter(file_manager->getUserConfigFile("profiling_gpu.csv").c_str(), std::ios::out | std::ios::binary); - const char* str = m_gpu_capture_report_buffer->getRawBuffer(); - filewriter.write(str, strlen(str)); + m_thread_mapping.unlock(); + return i; } + i++; + } // for i second.setStart(m_current_frame, start, td.m_event_stack.size()); + } + else + { + EventData ed(colour, m_max_frames); + ed.setStart(m_current_frame, start, td.m_event_stack.size()); + td.m_all_event_data[name] = ed; + } + + td.m_event_stack.push_back(name); +} // pushCPUMarker //----------------------------------------------------------------------------- /// Stop the last pushed marker -void Profiler::popCpuMarker() +void Profiler::popCPUMarker() { // Don't do anything when frozen if(m_freeze_state == FROZEN || m_freeze_state == WAITING_FOR_UNFREEZE) return; - ThreadInfo& ti = getThreadInfo(); - assert(ti.markers_stack[m_write_id].size() > 0); + int thread_id = getThreadID(); + ThreadData &td = m_all_threads_data[thread_id]; - MarkerStack& markers_stack = ti.markers_stack[m_write_id]; - MarkerList& markers_done = ti.markers_done[m_write_id]; + assert(td.m_event_stack.size() > 0); - // Update the date of end of the marker - Marker& marker = markers_stack.top(); - marker.end = getTimeMilliseconds() - m_time_last_sync; + const std::string &name = td.m_event_stack.back(); + td.m_all_event_data[name].setEnd(m_current_frame, + getTimeMilliseconds() - m_time_last_sync); - // Remove the marker from the stack and add it to the list of markers done - markers_done.push_front(marker); - markers_stack.pop(); -} + td.m_event_stack.pop_back(); +} // popCPUMarker //----------------------------------------------------------------------------- -/// Swap buffering for the markers +/** Saves all data for the current frame, and starts the next frame in the + * circular buffer. Any events that are currently active (e.g. in a separate + * thread) will be split in two parts: the beginning (till now) in the current + * frame, the rest will be added to the next frame. + */ void Profiler::synchronizeFrame() { // Don't do anything when frozen if(m_freeze_state == FROZEN) return; - // Avoid using several times getTimeMilliseconds(), which would yield different results + // Avoid using several times getTimeMilliseconds(), + // which would yield different results double now = getTimeMilliseconds(); - // Swap buffers - int old_write_id = m_write_id; - m_write_id = !m_write_id; - - // For each thread: - ThreadInfoList::iterator it_end = m_thread_infos.end(); - for(ThreadInfoList::iterator it = m_thread_infos.begin() ; it != it_end ; it++) + // Set index to next frame + int next_frame = m_current_frame+1; + if (next_frame >= m_max_frames) { - // Get the thread information - ThreadInfo& ti = *it; - - MarkerList& old_markers_done = ti.markers_done[old_write_id]; - MarkerStack& old_markers_stack = ti.markers_stack[old_write_id]; - - MarkerList& new_markers_done = ti.markers_done[m_write_id]; - MarkerStack& new_markers_stack = ti.markers_stack[m_write_id]; - - // Clear the containers for the new frame - new_markers_done.clear(); - while(!new_markers_stack.empty()) - new_markers_stack.pop(); - - // Finish the markers in the stack of the previous frame - // and start them for the next frame. - - // For each marker in the old stack: - while(!old_markers_stack.empty()) - { - // - finish the marker for the previous frame and add it to the old "done" list - Marker& m = old_markers_stack.top(); - m.end = now - m_time_last_sync; - old_markers_done.push_front(m); - - // - start a new one for the new frame - Marker new_marker(0.0, -1.0, m.name.c_str(), m.color); - new_markers_stack.push(new_marker); - - // - next iteration - old_markers_stack.pop(); - } + next_frame = 0; + m_has_wrapped_around = true; } + // First finish all markers that are currently in progress, and add + // a new start marker for the next frame. So e.g. if a thread is busy in + // one event while the main thread syncs the frame, this event will get + // split into two parts in two consecutive frames + for (int i = 0; i < m_threads_used; i++) + { + ThreadData &td = m_all_threads_data[i]; + for(unsigned int j=0; jgetVideoDriver(); - std::stack hovered_markers; + + // Current frame points to the frame in which currently data is + // being accumulated. Draw the previous (i.e. complete) frame. + int indx = m_current_frame - 1; + if (indx < 0) indx = m_max_frames - 1; drawBackground(); // Force to show the pointer irr_driver->showPointer(); - int read_id = !m_write_id; - - // Compute some values for drawing (unit: pixels, but we keep floats for reducing errors accumulation) - core::dimension2d screen_size = driver->getScreenSize(); + // Compute some values for drawing (unit: pixels, but we keep floats + // for reducing errors accumulation) + core::dimension2d screen_size = driver->getScreenSize(); const double profiler_width = (1.0 - 2.0*MARGIN_X) * screen_size.Width; - const double x_offset = MARGIN_X*screen_size.Width; - const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height; - const double line_height = LINE_HEIGHT*screen_size.Height; - - size_t nb_thread_infos = m_thread_infos.size(); + const double x_offset = MARGIN_X*screen_size.Width; + const double y_offset = (MARGIN_Y + LINE_HEIGHT)*screen_size.Height; + const double line_height = LINE_HEIGHT*screen_size.Height; - double start = -1.0f; - double end = -1.0f; - for (size_t i = 0; i < nb_thread_infos; i++) + // Compute start end end time for this frame + double start = 99999.0f; + double end = -1.0f; + for (int i = 0; i < m_threads_used; i++) { - MarkerList& markers = m_thread_infos[i].markers_done[read_id]; - - MarkerList::const_iterator it_end = markers.end(); - for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++) + AllEventData &aed = m_all_threads_data[i].m_all_event_data; + AllEventData::iterator j; + for (j = aed.begin(); j != aed.end(); ++j) { - const Marker& m = *it; + const Marker &marker = j->second.getMarker(indx); + start = std::min(start, marker.getStart() ); + end = std::max(end, marker.getEnd() ); + } // for j in events + } // for i in threads - if (start < 0.0) start = m.start; - else start = std::min(start, m.start); - - if (end < 0.0) end = m.end; - else end = std::max(end, m.end); - } - } const double duration = end - start; const double factor = profiler_width / duration; @@ -315,70 +308,47 @@ void Profiler::draw() // Get the mouse pos core::vector2di mouse_pos = GUIEngine::EventHandler::get()->getMousePos(); - // For each thread: - for (size_t i = 0; i < nb_thread_infos; i++) + std::stack hovered_markers; + for (int i = 0; i < m_threads_used; i++) { - // Draw all markers - MarkerList& markers = m_thread_infos[i].markers_done[read_id]; - - if (markers.empty()) - continue; - - if (m_capture_report) + ThreadData &td = m_all_threads_data[i]; + AllEventData &aed = td.m_all_event_data; + AllEventData::iterator j; + for (j = aed.begin(); j != aed.end(); ++j) { - if (m_first_capture_sweep) - m_capture_report_buffer->getStdStream() << "\"Thread\";"; - else - m_capture_report_buffer->getStdStream() << i << ";"; - } - MarkerList::const_iterator it_end = markers.end(); - for (MarkerList::const_iterator it = markers.begin(); it != it_end; it++) - { - const Marker& m = *it; - assert(m.end >= 0.0); - - if (m_capture_report) - { - if (m_first_capture_sweep) - m_capture_report_buffer->getStdStream() << "\"" << m.name << "\";"; - else - m_capture_report_buffer->getStdStream() << (int)round((m.end - m.start) * 1000) << ";"; - } - core::rect pos((s32)( x_offset + factor*m.start ), - (s32)( y_offset + i*line_height ), - (s32)( x_offset + factor*m.end ), - (s32)( y_offset + (i+1)*line_height )); + const Marker &marker = j->second.getMarker(indx); + core::rect pos((s32)(x_offset + factor*marker.getStart()), + (s32)(y_offset + i*line_height), + (s32)(x_offset + factor*marker.getEnd()), + (s32)(y_offset + (i + 1)*line_height) ); // Reduce vertically the size of the markers according to their layer - pos.UpperLeftCorner.Y += m.layer*2; - pos.LowerRightCorner.Y -= m.layer*2; - - GL32_draw2DRectangle(m.color, pos); + pos.UpperLeftCorner.Y += 2 * marker.getLayer(); + pos.LowerRightCorner.Y -= 2 * marker.getLayer(); + GL32_draw2DRectangle(j->second.getColour(), pos); // If the mouse cursor is over the marker, get its information - if(pos.isPointInside(mouse_pos)) - hovered_markers.push(m); - } + if (pos.isPointInside(mouse_pos)) + { + hovered_markers.push(j); + } + + } // for j in AllEventdata + } // for i in threads - if (m_capture_report) - { - m_capture_report_buffer->getStdStream() << "\n"; - m_first_capture_sweep = false; - } - } // GPU profiler QueryPerf hovered_gpu_marker = Q_LAST; long hovered_gpu_marker_elapsed = 0; - int gpu_y = int(y_offset + nb_thread_infos*line_height + line_height/2); + int gpu_y = int(y_offset + m_threads_used*line_height + line_height/2); float total = 0; - unsigned int gpu_timers[Q_LAST]; for (unsigned i = 0; i < Q_LAST; i++) { #ifndef SERVER_ONLY - gpu_timers[i] = irr_driver->getGPUTimer(i).elapsedTimeus(); + int n = irr_driver->getGPUTimer(i).elapsedTimeus(); + m_gpu_times[indx*Q_LAST + i] = n; + total += n; #endif - total += gpu_timers[i]; } static video::SColor colors[] = { @@ -395,9 +365,10 @@ void Profiler::draw() float curr_val = 0; for (unsigned i = 0; i < Q_LAST; i++) { - //Log::info("GPU Perf", "Phase %d : %d us\n", i, irr_driver->getGPUTimer(i).elapsedTimeus()); + //Log::info("GPU Perf", "Phase %d : %d us\n", i, + // irr_driver->getGPUTimer(i).elapsedTimeus()); - float elapsed = float(gpu_timers[i]); + float elapsed = float(m_gpu_times[indx*Q_LAST+i]); core::rect pos((s32)(x_offset + (curr_val / total)*profiler_width), (s32)(y_offset + gpu_y), (s32)(x_offset + ((curr_val + elapsed) / total)*profiler_width), @@ -409,32 +380,23 @@ void Profiler::draw() if (pos.isPointInside(mouse_pos)) { hovered_gpu_marker = (QueryPerf)i; - hovered_gpu_marker_elapsed = gpu_timers[i]; + hovered_gpu_marker_elapsed = m_gpu_times[indx*Q_LAST+i]; } - if (m_capture_report) - { - if (m_first_gpu_capture_sweep) - m_gpu_capture_report_buffer->getStdStream() << GPU_Phase[i] << ";"; - else - m_gpu_capture_report_buffer->getStdStream() << elapsed << ";"; - } } - if (m_capture_report) - { - m_gpu_capture_report_buffer->getStdStream() << "\n"; - m_first_gpu_capture_sweep = false; - } } // Draw the end of the frame { s32 x_sync = (s32)(x_offset + factor*m_time_between_sync); s32 y_up_sync = (s32)(MARGIN_Y*screen_size.Height); - s32 y_down_sync = (s32)( (MARGIN_Y + (2+nb_thread_infos)*LINE_HEIGHT)*screen_size.Height ); + s32 y_down_sync = (s32)( (MARGIN_Y + (2+m_threads_used)*LINE_HEIGHT) + * screen_size.Height ); - GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00), core::rect(x_sync, y_up_sync, x_sync + 1, y_down_sync)); + GL32_draw2DRectangle(video::SColor(0xFF, 0x00, 0x00, 0x00), + core::rect(x_sync, y_up_sync, + x_sync + 1, y_down_sync)); } // Draw the hovered markers' names @@ -444,12 +406,13 @@ void Profiler::draw() core::stringw text; while(!hovered_markers.empty()) { - Marker& m = hovered_markers.top(); + AllEventData::iterator j = hovered_markers.top(); + const Marker &marker = j->second.getMarker(indx); std::ostringstream oss; oss.precision(4); - oss << m.name << " [" << (m.end - m.start) << " ms / "; + oss << j->first << " [" << (marker.getDuration()) << " ms / "; oss.precision(3); - oss << (m.end - m.start)*100.0 / duration << "%]" << std::endl; + oss << marker.getDuration()*100.0 / duration << "%]" << std::endl; text += oss.str().c_str(); hovered_markers.pop(); } @@ -458,19 +421,16 @@ void Profiler::draw() if (hovered_gpu_marker != Q_LAST) { std::ostringstream oss; - oss << GPU_Phase[hovered_gpu_marker] << " : " << hovered_gpu_marker_elapsed << " us"; - font->draw(oss.str().c_str(), GPU_MARKERS_NAMES_POS, video::SColor(0xFF, 0xFF, 0x00, 0x00)); + oss << GPU_Phase[hovered_gpu_marker] << " : " + << hovered_gpu_marker_elapsed << " us"; + font->draw(oss.str().c_str(), GPU_MARKERS_NAMES_POS, + video::SColor(0xFF, 0xFF, 0x00, 0x00)); } } - if (m_capture_report) - { - font->draw("Capturing profiler report...", MARKERS_NAMES_POS, video::SColor(0xFF, 0x00, 0x90, 0x00)); - } - PROFILER_POP_CPU_MARKER(); #endif -} +} // draw //----------------------------------------------------------------------------- /// Handle freeze/unfreeze @@ -479,10 +439,11 @@ void Profiler::onClick(const core::vector2di& mouse_pos) video::IVideoDriver* driver = irr_driver->getVideoDriver(); const core::dimension2d& screen_size = driver->getScreenSize(); - core::rectbackground_rect((int)(MARGIN_X * screen_size.Width), - (int)(MARGIN_Y * screen_size.Height), - (int)((1.0-MARGIN_X) * screen_size.Width), - (int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height)); + core::rectbackground_rect( + (int)(MARGIN_X * screen_size.Width), + (int)(MARGIN_Y * screen_size.Height), + (int)((1.0-MARGIN_X) * screen_size.Width), + (int)((MARGIN_Y + 3.0f*LINE_HEIGHT) * screen_size.Height) ); if(!background_rect.isPointInside(mouse_pos)) return; @@ -504,7 +465,7 @@ void Profiler::onClick(const core::vector2di& mouse_pos) // Same the other way around. break; } -} +} // onClick //----------------------------------------------------------------------------- /// Helper to draw a white background @@ -514,12 +475,80 @@ void Profiler::drawBackground() video::IVideoDriver* driver = irr_driver->getVideoDriver(); const core::dimension2d& screen_size = driver->getScreenSize(); - core::rectbackground_rect((int)(MARGIN_X * screen_size.Width), - (int)((MARGIN_Y + 0.25f) * screen_size.Height), - (int)((1.0-MARGIN_X) * screen_size.Width), - (int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height)); + core::rectbackground_rect( + (int)(MARGIN_X * screen_size.Width), + (int)((MARGIN_Y + 0.25f) * screen_size.Height), + (int)((1.0-MARGIN_X) * screen_size.Width), + (int)((MARGIN_Y + 1.75f*LINE_HEIGHT) * screen_size.Height) ); video::SColor color(0x88, 0xFF, 0xFF, 0xFF); GL32_draw2DRectangle(color, background_rect); #endif -} +} // drawBackground + +//----------------------------------------------------------------------------- +/** Saves the collected profile data to a file. Filename is based on the + * stdout name (with -profile appended). + */ +void Profiler::writeToFile() +{ + std::string base_name = + file_manager->getUserConfigFile(file_manager->getStdoutName()); + // First CPU data + std::ofstream f(base_name + ".profile-cpu"); + for (int thread_id = 0; thread_id < m_threads_used; thread_id++) + { + ThreadData &td = m_all_threads_data[thread_id]; + AllEventData::iterator j; + std::vector new_headings; + for (j = td.m_all_event_data.begin(); j != td.m_all_event_data.end(); j++) + { + std::vector::iterator f = + std::find(m_all_event_names.begin(), + m_all_event_names.end(), j->first); + if(f==m_all_event_names.end()) + new_headings.push_back(j->first); + } + std::sort(new_headings.begin(), new_headings.end()); + f << "# \"Thread(1)\" "; + for (unsigned int i = 0; i < new_headings.size(); i++) + f << "\"" << new_headings[i] << "(" << i+2 <<")\" "; + f << std::endl; + int start = m_has_wrapped_around ? m_current_frame + 1 : 0; + if (start > m_max_frames) start -= m_max_frames; + while (start != m_current_frame) + { + f << "t" << thread_id << " "; + for (unsigned int i = 0; i < new_headings.size(); i++) + { + const EventData &ed = td.m_all_event_data[new_headings[i]]; + f << int(ed.getMarker(start).getDuration()*1000) << " "; + } // for i i new_headings + f << std::endl; + start = (start + 1) % m_max_frames; + } + } // for + f.close(); + + std::ofstream f_gpu(base_name + ".profile-gpu"); + f_gpu << "# "; + + for (unsigned i = 0; i < Q_LAST; i++) + { + f_gpu << "\"" << GPU_Phase[i] << "(" << i+1 << ")\" "; + } // for i < Q_LAST + f_gpu << std::endl; + + int start = m_has_wrapped_around ? m_current_frame + 1 : 0; + if (start > m_max_frames) start -= m_max_frames; + while (start != m_current_frame) + { + for (unsigned i = 0; i < Q_LAST; i++) + { + f_gpu << m_gpu_times[start*Q_LAST + i] << " "; + } // for i < Q_LAST + f_gpu << std::endl; + start = (start + 1) % m_max_frames; + } + f.close(); +} // writeFile diff --git a/src/utils/profiler.hpp b/src/utils/profiler.hpp index 6d8e0a622..58cf02e39 100644 --- a/src/utils/profiler.hpp +++ b/src/utils/profiler.hpp @@ -18,15 +18,20 @@ #ifndef PROFILER_HPP #define PROFILER_HPP -#include -#include -#include -#include -#include -#include -#include -#include +#include "utils/synchronised.hpp" +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include enum QueryPerf { @@ -70,10 +75,10 @@ double getTimeMilliseconds(); #ifdef ENABLE_PROFILER #define PROFILER_PUSH_CPU_MARKER(name, r, g, b) \ - profiler.pushCpuMarker(name, video::SColor(0xFF, r, g, b)) + profiler.pushCPUMarker(name, video::SColor(0xFF, r, g, b)) #define PROFILER_POP_CPU_MARKER() \ - profiler.popCpuMarker() + profiler.popCPUMarker() #define PROFILER_SYNC_FRAME() \ profiler.synchronizeFrame() @@ -89,84 +94,160 @@ double getTimeMilliseconds(); using namespace irr; -/** For profiling reports, we need a custom strijng stream that writes to a large - pre-allocated buffer, to avoid allocating as much as possible durign profiling */ -template -struct ostreambuf : public std::basic_streambuf > -{ - ostreambuf(char_type* buffer, std::streamsize bufferLength) - { - // set the "put" pointer the start of the buffer and record it's length. - this->setp(buffer, buffer + bufferLength); - } -}; - - -class StringBuffer -{ -private: - char* m_buffer; - ostreambuf ostreamBuffer; - std::ostream messageStream; - -public: - - StringBuffer(unsigned int size) : m_buffer((char*)calloc(size, 1)), ostreamBuffer(m_buffer, size), messageStream(&ostreamBuffer) - { - } - - ~StringBuffer() - { - free(m_buffer); - } - - std::ostream& getStdStream() { return messageStream; } - - char* getRawBuffer() { return m_buffer; } -}; - -/** - * \brief class that allows run-time graphical profiling through the use of markers - * \ingroup utils - */ +// ============================================================================ +/** \brief class that allows run-time graphical profiling through the use + * of markers. + * \ingroup utils + */ class Profiler { private: - struct Marker + // ------------------------------------------------------------------------ + class Marker { - double start; // Times of start and end, in milliseconds, - double end; // relatively to the time of last synchronization - size_t layer; + private: + /** An event that is started (pushed) stores the start time in this + * variable. */ + double m_start; - std::string name; - video::SColor color; + /** Duration of the event in this frame (accumulated if this event + * should be recorded more than once). */ - Marker(double start, double end, const char* name="N/A", const video::SColor& color=video::SColor(), size_t layer=0) - : start(start), end(end), layer(layer), name(name), color(color) + double m_duration; + /** Distance of marker from root (for nested events), used to + * adjust vertical height when drawing. */ + size_t m_layer; + public: + // -------------------------------------------------------------------- + Marker() { m_start = 0; m_duration = 0; m_layer = 0; } + + // -------------------------------------------------------------------- + Marker(double start, size_t layer=0) + : m_start(start), m_duration(0), m_layer(layer) { } - + // -------------------------------------------------------------------- Marker(const Marker& ref) - : start(ref.start), end(ref.end), layer(ref.layer), name(ref.name), color(ref.color) + : m_start(ref.m_start), m_duration(ref.m_duration), + m_layer(ref.m_layer) { } - }; + // -------------------------------------------------------------------- + /** Returns the start time of this event marker. */ + double getStart() const { return m_start; } + // -------------------------------------------------------------------- + /** Returns the end time of this event marker. */ + double getEnd() const { return m_start+m_duration; } + // -------------------------------------------------------------------- + /** Returns the duration of this event. */ + double getDuration() const { return m_duration; } + // -------------------------------------------------------------------- + size_t getLayer() const { return m_layer; } + // -------------------------------------------------------------------- + /** Sets start time and layer for this event. */ + void setStart(double start, size_t layer = 0) + { + m_start = start; m_duration = 0; m_layer = layer; + } // setStart + // -------------------------------------------------------------------- + /** Sets the end time of this event. */ + void setEnd(double end) + { + m_duration += (end - m_start); + } // setEnd - typedef std::list MarkerList; - typedef std::stack MarkerStack; + }; // class Marker - struct ThreadInfo + // ======================================================================== + /** The data for one event. It contains the events colours, all markers + * for the buffer period and a stack to detect nesting of markers. + */ + class EventData { - MarkerList markers_done[2]; - MarkerStack markers_stack[2]; - }; + private: + /** Colour to use in the on-screen display */ + video::SColor m_colour; - typedef std::vector ThreadInfoList; + /** Vector of all buffered markers. */ + std::vector m_all_markers; - ThreadInfoList m_thread_infos; - int m_write_id; - double m_time_last_sync; - double m_time_between_sync; + public: + EventData() {} + EventData(video::SColor colour, int max_size) + { + m_all_markers.resize(max_size); + m_colour = colour; + } // EventData + // -------------------------------------------------------------------- + /** Records the start of an event for a given frame. */ + void setStart(size_t frame, double start, int layer) + { + assert(frame < m_all_markers.capacity()); + m_all_markers[frame].setStart(start, layer); + } // setStart + // -------------------------------------------------------------------- + /** Records the end of an event for a given frame. */ + void setEnd(size_t frame, double end) + { + assert(frame < m_all_markers.capacity()); + m_all_markers[frame].setEnd(end); + } // setEnd + // -------------------------------------------------------------------- + const Marker& getMarker(int n) const { return m_all_markers[n]; } + // -------------------------------------------------------------------- + /** Returns the colour for this event. */ + video::SColor getColour() const { return m_colour; } + // -------------------------------------------------------------------- + }; // EventData + + // ======================================================================== + /** The mapping of event names to the corresponding EventData. */ + typedef std::map AllEventData; + // ======================================================================== + struct ThreadData + { + /** Stack of events to detect nesting. */ + std::vector< std::string > m_event_stack; + + AllEventData m_all_event_data; + }; // class ThreadData + + // ======================================================================== + + /** Data structure containing all currently buffered markers. The index + * is the thread id. */ + std::vector< ThreadData> m_all_threads_data; + + /** A mapping of thread_t pointers to a unique integer (starting from 0).*/ + Synchronised< std::vector > m_thread_mapping; + + /** Buffer for the GPU times (in ms). */ + std::vector m_gpu_times; + + /** Counts the threads used, i.e. registered in m_thread_mapping. */ + int m_threads_used; + + /** Index of the current frame in the buffer. */ + int m_current_frame; + + /** True if the circular buffer has wrapped around. */ + bool m_has_wrapped_around; + + /** The maximum number of frames to be buffered. Used to minimise + * reallocations. */ + int m_max_frames; + + /** Time of last sync. All start/end times are stored relative + * to this time. */ + double m_time_last_sync; + + /** Time between now and last sync, used to scale the GUI bar. */ + double m_time_between_sync; + + /** List of all event names. This list is sorted to make sure + * if the circular buffer is dumped more than once the order + * of events remains the same. */ + std::vector m_all_event_names; // Handling freeze/unfreeze by clicking on the display enum FreezeState @@ -179,35 +260,25 @@ private: FreezeState m_freeze_state; - bool m_capture_report; - bool m_first_capture_sweep; - bool m_first_gpu_capture_sweep; - StringBuffer* m_capture_report_buffer; - StringBuffer* m_gpu_capture_report_buffer; +private: + int getThreadID(); + void drawBackground(); public: - Profiler(); + Profiler(); virtual ~Profiler(); - void pushCpuMarker(const char* name="N/A", const video::SColor& color=video::SColor()); - void popCpuMarker(); - void synchronizeFrame(); - - void draw(); - - void onClick(const core::vector2di& mouse_pos); - - bool getCaptureReport() const { return m_capture_report; } - void setCaptureReport(bool captureReport); + void pushCPUMarker(const char* name="N/A", + const video::SColor& color=video::SColor()); + void popCPUMarker(); + void synchronizeFrame(); + void draw(); + void onClick(const core::vector2di& mouse_pos); + void writeToFile(); + // ------------------------------------------------------------------------ bool isFrozen() const { return m_freeze_state == FROZEN; } -protected: - // TODO: detect on which thread this is called to support multithreading - ThreadInfo& getThreadInfo() { return m_thread_infos[0]; } - void drawBackground(); - - }; #endif // PROFILER_HPP