From 52e69591add6a65a538614379cd6c2a3c4c92240 Mon Sep 17 00:00:00 2001 From: Greg Beard Date: Sat, 2 Dec 2023 09:01:58 +0000 Subject: [PATCH] feat(performance) Multiple performance improvements and other fixes Performance: - Adds dedicated thread for dequeueing and writing encoded packets - Use native (BGRA) colourspace for NVFBC capture Using the native colourspace means NVFBC doesn't have to perform any conversion. In theory, this will improve capture performance - Allow NvFBC to wait up to a maximum time for a new frame Fixes: - Adds eventfd for cancelling contexts without timers Contexts without timers require this to be cancellable with `request_stop()`. - Fixes memory leak in h/w frame context initialization Other: - Adds metrics for video and audio encoding frame times - Adds performance report generator script - Allows NvFBC adjusted FPS to be enabled/disabled with ENV var `SHADOW_CAST_STRICT_FPS`. Defaults to `1`. - Add FAQ --- .gitignore | 1 + .versioning/changes/DyxXCHexJS.minor.md | 1 + .versioning/changes/S9GbUeHXdB.minor.md | 2 +- .versioning/changes/UaFV3jPgO3.minor.md | 1 + .versioning/changes/wekMseGz7z.patch.md | 1 + README.md | 19 +- doc/faq.md | 24 +++ src/CMakeLists.txt | 5 +- src/av/codec.cpp | 19 +- src/av/codec.hpp | 2 +- src/handlers/audio_chunk_writer.cpp | 26 ++- src/handlers/audio_chunk_writer.hpp | 10 +- src/handlers/drm_video_frame_writer.cpp | 69 +++---- src/handlers/drm_video_frame_writer.hpp | 14 +- src/handlers/video_frame_writer.cpp | 71 +++---- src/handlers/video_frame_writer.hpp | 16 +- src/main.cpp | 243 ++++++++++++++++-------- src/nvidia.cpp | 7 +- src/nvidia.hpp | 2 +- src/services.hpp | 2 + src/services/audio_service.cpp | 3 +- src/services/context.cpp | 47 ++++- src/services/context.hpp | 5 +- src/services/drm_video_service.cpp | 4 +- src/services/drm_video_service.hpp | 5 +- src/services/encoder.cpp | 37 ++++ src/services/encoder.hpp | 27 +++ src/services/encoder_service.cpp | 151 +++++++++++++++ src/services/encoder_service.hpp | 83 ++++++++ src/services/video_service.cpp | 14 +- src/services/video_service.hpp | 3 +- src/utils.hpp | 1 + src/utils/cmd_line.cpp | 20 +- src/utils/cmd_line.hpp | 4 +- src/utils/frame_time.cpp | 49 +++++ src/utils/frame_time.hpp | 26 +++ src/utils/pool.hpp | 50 ++++- tools/make-dist | 1 + tools/metrics/average | 10 +- tools/metrics/percentile | 12 +- tools/metrics/plot | 19 +- tools/next-version | 25 +++ tools/performance-report | 97 ++++++++++ 43 files changed, 974 insertions(+), 254 deletions(-) create mode 100644 .versioning/changes/DyxXCHexJS.minor.md create mode 100644 .versioning/changes/UaFV3jPgO3.minor.md create mode 100644 .versioning/changes/wekMseGz7z.patch.md create mode 100644 doc/faq.md create mode 100644 src/services/encoder.cpp create mode 100644 src/services/encoder.hpp create mode 100644 src/services/encoder_service.cpp create mode 100644 src/services/encoder_service.hpp create mode 100644 src/utils/frame_time.cpp create mode 100644 src/utils/frame_time.hpp create mode 100755 tools/next-version create mode 100755 tools/performance-report diff --git a/.gitignore b/.gitignore index 272d0fd..57fae5c 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ build/ .private/ experimental/ dist/ +report/ diff --git a/.versioning/changes/DyxXCHexJS.minor.md b/.versioning/changes/DyxXCHexJS.minor.md new file mode 100644 index 0000000..5bbb8f9 --- /dev/null +++ b/.versioning/changes/DyxXCHexJS.minor.md @@ -0,0 +1 @@ +Improves encoding performance by using a dedicated thread diff --git a/.versioning/changes/S9GbUeHXdB.minor.md b/.versioning/changes/S9GbUeHXdB.minor.md index 1ece5ab..d243ce9 100644 --- a/.versioning/changes/S9GbUeHXdB.minor.md +++ b/.versioning/changes/S9GbUeHXdB.minor.md @@ -1 +1 @@ -Adds a build-time option to enable timing metric collection for audio & video +Adds a build-time option to enable frame time metric collection diff --git a/.versioning/changes/UaFV3jPgO3.minor.md b/.versioning/changes/UaFV3jPgO3.minor.md new file mode 100644 index 0000000..c72f8d1 --- /dev/null +++ b/.versioning/changes/UaFV3jPgO3.minor.md @@ -0,0 +1 @@ +Reduces frame "jitter" when capturing certain games in X11 diff --git a/.versioning/changes/wekMseGz7z.patch.md b/.versioning/changes/wekMseGz7z.patch.md new file mode 100644 index 0000000..c4b2067 --- /dev/null +++ b/.versioning/changes/wekMseGz7z.patch.md @@ -0,0 +1 @@ +Fixes a memory leak in H/W encoding pipeline diff --git a/README.md b/README.md index c32a385..51c9319 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ Typical screen capture utilities copy the framebuffer data between host and GPU - [Building from source](#building-from-source) - [Installing](#installing) - [Alternative projects](#some-alternative-projects) +- [FAQ](doc/faq.md) #### Example - Cyberpunk 2077 [![Cyberpunk 2077](http://i3.ytimg.com/vi/frXGxrdgTLY/hqdefault.jpg)](https://www.youtube.com/watch?v=frXGxrdgTLY) @@ -26,21 +27,15 @@ The resulting media/container type is determined by the extension of `` - All options available to `ffmpeg` should work here. Defaults to `libopus` -- `-V