From 792c1e0926b1c64db2a702a43374b616376275f1 Mon Sep 17 00:00:00 2001 From: Alayan <25536748+Alayan-stk-2@users.noreply.github.com> Date: Fri, 26 Apr 2024 16:11:55 +0200 Subject: [PATCH] Compute new performance metrics and improve saving of profiler data * Format the standard CPU and GPU profiler data as CSV, to simplify its analysis * Include the internal identifier name of the track in the report name, or 'menu' if there is no active track. This uses the current track at the time of report saving. It is possible to keep the profiler on through multiple tracks and menus, but it's on people enabling the artist debug mode and the profiler to use it sensibly * Stop the profiler when saving data, if it's not already stopped * Compute, when profiling is stopped, for all integer FPS values up to 1000, three key metrics: - The number of frames too slow to reach this FPS value - The proportion of total time spent in frames slower than this FPS value - The proportion of total time spent in frames beyond the maximum duration associated with this FPS value * Compute three key indicator values: - Steady FPS (highest FPS with no more than 1% time spent in slower frames and no more than 0.1% time spent beyond the maximum duration) - Mostly stable FPS (highest FPS with no more than 12% time spent in slower frames and no more than 2% time spent beyond the maximum duration) - Typical FPS (highest FPS with no more than 50% time spent in slower frames and no more than 10% time spent beyond the maximum duration) * Save an additional 'perf-report' containing this new data Traditional performance measurements, such as average FPS and 1% lows (which are typically based on the 1% slowest frames, not on the 1% slowest gameplay time), are flawed because they unduly reward outputting more fast frames. Increasing the number of fast frames without speeding up the slow frames is useless when it comes to making the game feel smooth, which is especially critical in a racing game. Missing features: - Displaying a summary of the performance test instead of the normal end screen - Confirm to the user that the report has been successfully saved - And more advanced options. --- src/modes/world_status.cpp | 2 +- src/utils/profiler.cpp | 140 ++++++++++++++++++++++++++++++++++--- src/utils/profiler.hpp | 31 ++++++++ 3 files changed, 163 insertions(+), 10 deletions(-) diff --git a/src/modes/world_status.cpp b/src/modes/world_status.cpp index 761f226c7..711697a0b 100644 --- a/src/modes/world_status.cpp +++ b/src/modes/world_status.cpp @@ -356,7 +356,7 @@ void WorldStatus::updateTime(int ticks) if(RaceManager::get()->isBenchmarking()) { // The profiler drawings cost performance - //profiler.setDrawing(false); + profiler.setDrawing(false); profiler.activate(); } diff --git a/src/utils/profiler.cpp b/src/utils/profiler.cpp index 3ee5cd46b..2990c85f7 100644 --- a/src/utils/profiler.cpp +++ b/src/utils/profiler.cpp @@ -26,6 +26,7 @@ #include "graphics/irr_driver.hpp" #include "guiengine/scalable_font.hpp" #include "io/file_manager.hpp" +#include "tracks/track.hpp" #include "utils/file_utils.hpp" #include "utils/string_utils.hpp" #include "utils/tls.hpp" @@ -244,11 +245,12 @@ void Profiler::activate() } // activate //----------------------------------------------------------------------------- -/** Switches the profiler off. +/** Switches the profiler off and computes frametime analysis. */ void Profiler::desactivate() { UserConfigParams::m_profiler_enabled = false; + computeStableFPS(); } // desactivate //----------------------------------------------------------------------------- @@ -566,24 +568,141 @@ void Profiler::drawBackground() #endif } // drawBackground +//----------------------------------------------------------------------------- +/** To be called once the data for a report has been collected, + * computes the average FPS, and for each integer FPS value from 1 to 1000, + * the proportion of frames fast enough for this FPS value, + * the proportion of time spent in frames fast enough for this FPS value, + * as well as the proportion of total benchmark time spent waiting for a frame + * beyond the normal duration of a frame at this FPS value. + */ +void Profiler::computeStableFPS() +{ + // TODO: Here we only check frametimes for the 1st CPU thread. + // This is not problematic if there is only one thread + // as seems to be default, but does it necessarily produce + // correct frametimes if there are multiples? + // If not, then additional code to identify the correct + // frametime values is needed. Once the vector of frametimes + // is produced, the rest of the computations are unchanged. + m_lock.lock(); + + ThreadData &td = m_all_threads_data[0]; + int start = m_has_wrapped_around ? m_current_frame + 1 : 0; + if (start > m_max_frames) start -= m_max_frames; + // Remove any old data if needed + m_frame_times.clear(); + m_total_frametime = 0; + + while (start != m_current_frame) + { + // The overall duration of a frame is recorded in the outermost + // marker event, which is always the 1st event in the list. + const EventData &ed = td.m_all_event_data[td.m_ordered_headings[0]]; + int frame_microseconds = int(ed.getMarker(start).getDuration()*1000); + // A spurious instant frame is recorded at the beginning of the recording + if (frame_microseconds > 0) + m_frame_times.push_back(frame_microseconds); + m_total_frametime += frame_microseconds; + + start = (start + 1) % m_max_frames; + } // while start != m_current_frame + + m_total_frames = m_frame_times.size(); + + // Sort from biggest to smallest with the reverse iterators + std::sort(m_frame_times.rbegin(), m_frame_times.rend()); + + m_fps_metrics_high = m_fps_metrics_mid = m_fps_metrics_low = 0; + + for (int i=1; i<=MAX_ANALYZED_FPS; i++) + { + int max_microseconds = 1000000/i; // The rounding errors are acceptable + m_time_spent_in_slow_frames[i-1] = 0; + m_time_waited_in_slow_frames[i-1] = 0; + m_slow_frames[i-1] = m_frame_times.size(); // Will be updated if lower + + for (uint j=0; j max_microseconds) + { + m_time_spent_in_slow_frames[i-1] += m_frame_times[j]; + m_time_waited_in_slow_frames[i-1] += (m_frame_times[j] - max_microseconds); + } + // As frames are sorted, we can skip all remaining frames + else + { + m_slow_frames[i-1] = j; + break; + } + } + + float ratio_time_spent = float(m_time_spent_in_slow_frames[i-1]) / float(m_total_frametime); + float ratio_time_waited = float(m_time_waited_in_slow_frames[i-1]) / float(m_total_frametime); + + if (ratio_time_spent < 0.5f && ratio_time_waited < 0.1f) + m_fps_metrics_high = i; + if (ratio_time_spent < 0.12f && ratio_time_waited < 0.02f) + m_fps_metrics_mid = i; + if (ratio_time_spent < 0.01f && ratio_time_waited < 0.001f) + m_fps_metrics_low = i; + } + + Log::info("Profiler", "Frame count '%i', Time (ms) '%i', Steady FPS '%i', Mostly stable FPS '%i', Typical FPS '%i'", m_total_frames, m_total_frametime/1000, m_fps_metrics_low, m_fps_metrics_mid, m_fps_metrics_high); + + m_lock.unlock(); +} // computeStableFPS + //----------------------------------------------------------------------------- /** Saves the collected profile data to a file. Filename is based on the * stdout name (with -profile appended). */ void Profiler::writeToFile() { + // Turn the profiler off, and ensure overall performance metrics are computed + if (UserConfigParams::m_profiler_enabled) + desactivate(); + m_lock.lock(); std::string base_name = file_manager->getUserConfigFile(file_manager->getStdoutName()); - // First CPU data + + // 1: Save overall performance metrics + std::ofstream f(FileUtils::getPortableWritingPath(base_name + + ".perf-report-" + (Track::getCurrentTrack() != NULL ? Track::getCurrentTrack()->getIdent() : "menu") + ".csv")); + + f << "Total frame count, Total profiling time (ms),"; + f << std::endl; + f << m_total_frames << ", " << int(m_total_frametime / 1000) << ","; + f << std::endl; + f << std::endl; + f << "Steady FPS, Mostly stable FPS, Typical FPS,"; + f << std::endl; + f << m_fps_metrics_low << ", " << m_fps_metrics_mid << ", " << m_fps_metrics_high << ","; + f << std::endl; + f << std::endl; + f << "Target FPS, Slow frames count, Duration of slow frames (ratio), Excess duration of slow frames (ratio),"; + f << std::endl; + + for (unsigned int i = 1; i < MAX_ANALYZED_FPS; i++) + { + float ratio_time_spent = float(m_time_spent_in_slow_frames[i-1]) / float(m_total_frametime); + float ratio_time_waited = float(m_time_waited_in_slow_frames[i-1]) / float(m_total_frametime); + f << i << ", " << m_slow_frames[i-1] << ", " << ratio_time_spent << ", " << ratio_time_waited << ","; + f << std::endl; + } + f.close(); + + // 2: Save per-thread CPU data for (int thread_id = 0; thread_id < m_threads_used; thread_id++) { - std::ofstream f(FileUtils::getPortableWritingPath( - base_name + ".profile-cpu-" + StringUtils::toString(thread_id))); + std::ofstream f(FileUtils::getPortableWritingPath(base_name + + ".profile-" + (Track::getCurrentTrack() != NULL ? Track::getCurrentTrack()->getIdent() : "menu") + + "-cpu-" + StringUtils::toString(thread_id) + ".csv")); ThreadData &td = m_all_threads_data[thread_id]; f << "# "; for (unsigned int i = 0; i < td.m_ordered_headings.size(); i++) - f << "\"" << td.m_ordered_headings[i] << "(" << i+1 <<")\" "; + f << "\"" << td.m_ordered_headings[i] << "(" << i+1 <<")\", "; f << std::endl; int start = m_has_wrapped_around ? m_current_frame + 1 : 0; if (start > m_max_frames) start -= m_max_frames; @@ -592,7 +711,7 @@ void Profiler::writeToFile() for (unsigned int i = 0; i < td.m_ordered_headings.size(); i++) { const EventData &ed = td.m_all_event_data[td.m_ordered_headings[i]]; - f << int(ed.getMarker(start).getDuration()*1000) << " "; + f << int(ed.getMarker(start).getDuration()*1000) << ", "; } // for i i new_headings f << std::endl; start = (start + 1) % m_max_frames; @@ -600,12 +719,15 @@ void Profiler::writeToFile() f.close(); } // for all thread_ids - std::ofstream f_gpu(FileUtils::getPortableWritingPath(base_name + ".profile-gpu")); + + // 3: Save GPU data + std::ofstream f_gpu(FileUtils::getPortableWritingPath(base_name + ".profile-" + + (Track::getCurrentTrack() != NULL ? Track::getCurrentTrack()->getIdent() : "menu") + "-gpu" + ".csv")); f_gpu << "# "; for (unsigned i = 0; i < Q_LAST; i++) { - f_gpu << "\"" << irr_driver->getGPUQueryPhaseName(i) << "(" << i+1 << ")\" "; + f_gpu << "\"" << irr_driver->getGPUQueryPhaseName(i) << "(" << i+1 << ")\", "; } // for i < Q_LAST f_gpu << std::endl; @@ -615,7 +737,7 @@ void Profiler::writeToFile() { for (unsigned i = 0; i < Q_LAST; i++) { - f_gpu << m_gpu_times[start*Q_LAST + i] << " "; + f_gpu << m_gpu_times[start*Q_LAST + i] << ", "; } // for i < Q_LAST f_gpu << std::endl; start = (start + 1) % m_max_frames; diff --git a/src/utils/profiler.hpp b/src/utils/profiler.hpp index 0b09dcc63..5f4ad626a 100644 --- a/src/utils/profiler.hpp +++ b/src/utils/profiler.hpp @@ -68,6 +68,7 @@ extern Profiler profiler; double getTimeMilliseconds(); +#define MAX_ANALYZED_FPS 1000 #define ENABLE_PROFILER #ifdef ENABLE_PROFILER @@ -241,6 +242,35 @@ private: * while a new thread is added. */ Synchronised m_lock; + /** Stores the frame times (in µs), once FPS metrics are computed. */ + std::vector m_frame_times; + + /** Stores the total duration of the frames analyzed (in µs), once FPS metrics + * are computed. Int overflow is not a concern unless more than 30 minutes + * of data is stored and analyzed. */ + int m_total_frametime; + + /** Stores the frame count */ + int m_total_frames; + + /** Store the count of slow frames for a given FPS value */ + int m_slow_frames[MAX_ANALYZED_FPS]; + + /** Store time spent in slow frames for a given FPS value (in µs) */ + int m_time_spent_in_slow_frames[MAX_ANALYZED_FPS]; + + /** Store time spent beyond the maximum duration of a frame for a given FPS value (in µs) */ + int m_time_waited_in_slow_frames[MAX_ANALYZED_FPS]; + + /** Store the highest FPS with <50% time in slow frames and < 10% time waited beyond maximum duration */ + int m_fps_metrics_high; + + /** Store the highest FPS with <12% time in slow frames and < 2% time waited beyond maximum duration */ + int m_fps_metrics_mid; + + /** Store the highest FPS with < 1% time in slow frames and <0.1% time waited beyond maximum duration */ + int m_fps_metrics_low; + /** True if the circular buffer has wrapped around. */ bool m_has_wrapped_around; @@ -291,6 +321,7 @@ public: void synchronizeFrame(); void draw(); void onClick(const core::vector2di& mouse_pos); + void computeStableFPS(); void writeToFile(); // ------------------------------------------------------------------------