Skip to content

Commit a1f63d3

Browse files
committed
Added explicit DwmFlush (...) on a dedicated thread to sample DXGI's VBLANK timing information
1 parent 92c8153 commit a1f63d3

File tree

8 files changed

+163
-99
lines changed

8 files changed

+163
-99
lines changed

CHANGELOG.txt

+17-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,20 @@
1-
24.9.16
1+
24.9.16.1
2+
=========
3+
+ Added explicit DwmFlush (...) on a dedicated thread to sample DXGI's
4+
VBLANK timing information so that it doesn't wobble on drivers that
5+
support HW Flip Queue.
6+
7+
+ Added CPU affinity INI option
8+
9+
[Scheduler.Boost]
10+
ProcessorAffinityMask=[0,0xFFFFFFFFFFFFFFFF] Default=0xFFFFFFFFFFFFFFFF
11+
12+
This is a bitmask, so map out the processor numbers you want in binary
13+
and then convert to decimal.
14+
15+
(i.e. ProcessorAffinityMask=1365 -> 10101010101 (6 physical cores))
16+
17+
24.9.16
218
=======
319
+ Reduced overhead and added smoothing to real-time VRR Rate calculation
420

include/SpecialK/DLL_VERSION.H

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
#define SK_YEAR 24
44
#define SK_MONTH 9
55
#define SK_DATE 16
6-
#define SK_REV_N 0
7-
#define SK_REV 0
6+
#define SK_REV_N 1
7+
#define SK_REV 1
88

99
#ifndef _A2
1010
#define _A2(a) #a

include/SpecialK/config.h

+1
Original file line numberDiff line numberDiff line change
@@ -1326,6 +1326,7 @@ struct sk_config_t
13261326
bool deny_foreign_change = true;
13271327
int minimum_render_prio = THREAD_PRIORITY_ABOVE_NORMAL;
13281328
DWORD available_cpu_cores = 1UL;
1329+
int64_t cpu_affinity_mask = 0xFFFFFFFFULL;
13291330
} priority;
13301331

13311332
struct skif_s {

src/config.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,7 @@ struct {
11271127
sk::ParameterBool* highest_priority = nullptr;
11281128
sk::ParameterBool* deny_foreign_change = nullptr;
11291129
sk::ParameterInt* min_render_priority = nullptr;
1130+
sk::ParameterInt64* cpu_affinity_mask = nullptr;
11301131
} priority;
11311132
} scheduling;
11321133

@@ -2007,6 +2008,7 @@ auto DeclKeybind =
20072008
ConfigEntry (scheduling.priority.highest_priority, L"Boost process priority to High instead of Above Normal", dll_ini, L"Scheduler.Boost", L"RaisePriorityToHigh"),
20082009
ConfigEntry (scheduling.priority.deny_foreign_change,L"Do not allow third-party apps to change priority", dll_ini, L"Scheduler.Boost", L"DenyForeignChanges"),
20092010
ConfigEntry (scheduling.priority.min_render_priority,L"Minimum priority for a game's render thread", dll_ini, L"Scheduler.Boost", L"MinimumRenderThreadPriority"),
2011+
ConfigEntry (scheduling.priority.cpu_affinity_mask, L"Mask of CPU cores the process is eligible for scheduling.", dll_ini, L"Scheduler.Boost", L"ProcessorAffinityMask"),
20102012

20112013
ConfigEntry (sound.minimize_latency, L"Minimize Audio Latency while Game is Running", dll_ini, L"Sound.Mixing", L"MinimizeLatency"),
20122014

@@ -4105,6 +4107,12 @@ auto DeclKeybind =
41054107
scheduling.priority.highest_priority->load (config.priority.highest_priority);
41064108
scheduling.priority.deny_foreign_change->load (config.priority.deny_foreign_change);
41074109
scheduling.priority.min_render_priority->load (config.priority.minimum_render_prio);
4110+
scheduling.priority.cpu_affinity_mask->load (config.priority.cpu_affinity_mask);
4111+
4112+
if (config.priority.cpu_affinity_mask != 0xFFFFFFFFULL)
4113+
{
4114+
SetProcessAffinityMask (GetCurrentProcess (), (DWORD_PTR)config.priority.cpu_affinity_mask);
4115+
}
41084116

41094117
if (config.priority.raise_always)
41104118
SetPriorityClass (GetCurrentProcess (), ABOVE_NORMAL_PRIORITY_CLASS);
@@ -6093,6 +6101,7 @@ SK_SaveConfig ( std::wstring name,
60936101
scheduling.priority.highest_priority->store (config.priority.highest_priority);
60946102
scheduling.priority.deny_foreign_change->store (config.priority.deny_foreign_change);
60956103
scheduling.priority.min_render_priority->store (config.priority.minimum_render_prio);
6104+
scheduling.priority.cpu_affinity_mask->store (config.priority.cpu_affinity_mask);
60966105

60976106
if (render.framerate.rescan_ratio != nullptr)
60986107
{

src/core.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -3349,6 +3349,9 @@ void
33493349
__stdcall
33503350
SK_BeginBufferSwapEx (BOOL bWaitOnFail)
33513351
{
3352+
void SK_Render_CountVBlanks (void);
3353+
SK_Render_CountVBlanks ();
3354+
33523355
if (config.render.framerate.enable_mmcss)
33533356
{
33543357
SK_MMCS_BeginBufferSwap ();

src/framerate.cpp

-84
Original file line numberDiff line numberDiff line change
@@ -2659,90 +2659,6 @@ SK::Framerate::DeepFrameState::reset (void)
26592659
std::vector <double>&
26602660
SK::Framerate::Stats::sortAndCacheFrametimeHistory (void) //noexcept
26612661
{
2662-
SK_RunOnce (
2663-
{
2664-
SK_Thread_CreateEx ([](LPVOID) -> DWORD
2665-
{
2666-
DXGI_FRAME_STATISTICS
2667-
frameStats = {};
2668-
2669-
auto& rb =
2670-
SK_GetCurrentRenderBackend ();
2671-
2672-
while (WaitForSingleObject (__SK_DLL_TeardownEvent, 1) != WAIT_OBJECT_0)
2673-
{
2674-
SK_ComQIPtr <IDXGISwapChain>
2675-
pSwapChain (rb.swapchain.p);
2676-
if (pSwapChain.p != nullptr && SUCCEEDED (pSwapChain->GetFrameStatistics (&frameStats)))
2677-
{ pSwapChain.Release ();
2678-
2679-
auto& nvapi_display =
2680-
rb.displays [rb.active_display].nvapi;
2681-
2682-
if (nvapi_display.display_handle != nullptr)
2683-
{
2684-
ULONGLONG& kSyncQPC =
2685-
(ULONGLONG&)frameStats.SyncQPCTime.QuadPart;
2686-
2687-
if (nvapi_display.vblank_counter.last_qpc_refreshed < kSyncQPC &&
2688-
nvapi_display.vblank_counter.addRecord (
2689-
nvapi_display.display_handle, &frameStats,(NvU32)(kSyncQPC /
2690-
SK_QpcTicksPerMs)))
2691-
{
2692-
nvapi_display.vblank_counter.last_qpc_refreshed =
2693-
kSyncQPC;
2694-
}
2695-
}
2696-
}
2697-
2698-
else if (sk::NVAPI::nv_hardware)
2699-
{
2700-
pSwapChain.Release ();
2701-
2702-
//
2703-
// Sample NVIDIA's VBlank counter from this thread, because that API
2704-
// has massive performance penalties and this thread runs constantly
2705-
// with little to no real workload.
2706-
//
2707-
auto& nvapi_display =
2708-
rb.displays [rb.active_display].nvapi;
2709-
2710-
if (nvapi_display.display_handle != nullptr)
2711-
{
2712-
bool got_new_reading = false;
2713-
2714-
const auto current_frame =
2715-
SK_GetFramesDrawn ();
2716-
2717-
while (! got_new_reading)
2718-
{
2719-
if (nvapi_display.vblank_counter.last_frame_sampled < current_frame - 1 &&
2720-
nvapi_display.vblank_counter.addRecord (
2721-
nvapi_display.display_handle, nullptr, SK_timeGetTime ()))
2722-
{
2723-
nvapi_display.vblank_counter.last_frame_sampled = current_frame;
2724-
got_new_reading = true;
2725-
}
2726-
2727-
else
2728-
{
2729-
if (WaitForSingleObject (__SK_DLL_TeardownEvent, 5) != WAIT_OBJECT_0)
2730-
continue;
2731-
2732-
break;
2733-
}
2734-
}
2735-
}
2736-
}
2737-
}
2738-
2739-
SK_Thread_CloseSelf ();
2740-
2741-
return 0;
2742-
}, L"[SK] VBlank Counter", nullptr);
2743-
});
2744-
2745-
27462662
#pragma warning (push)
27472663
#pragma warning (disable: 4244)
27482664
if (! InterlockedCompareExchange (&worker._init, 1, 0))

src/nvapi.cpp

+23-12
Original file line numberDiff line numberDiff line change
@@ -1236,7 +1236,7 @@ SK_RenderBackend_V2::output_s::nvapi_ctx_s::vblank_history_s::addRecord (NvDispl
12361236

12371237
head = std::min (head, (NvU32)MaxVBlankRecords-1);
12381238

1239-
if (vblank_count != records [head].vblank_count)
1239+
if (vblank_count > records [head].vblank_count)
12401240
{
12411241
if ( head == MaxVBlankRecords-1 )
12421242
head = 0;
@@ -1284,6 +1284,20 @@ SK_RenderBackend_V2::output_s::nvapi_ctx_s::vblank_history_s::getVBlankHz (NvU32
12841284
NvU32 vblank_t0 = UINT32_MAX,
12851285
vblank_n = 0;
12861286

1287+
for ( UINT record_idx = 0 ;
1288+
record_idx < MaxVBlankRecords ;
1289+
++record_idx )
1290+
{
1291+
const auto& record =
1292+
records [record_idx];
1293+
1294+
if (vblank_t0 > record.timestamp_ms)
1295+
vblank_t0 = record.timestamp_ms;
1296+
}
1297+
1298+
if (vblank_t0 > tNow)
1299+
vblank_t0 = tNow;
1300+
12871301
for ( UINT record_idx = 0 ;
12881302
record_idx < MaxVBlankRecords ;
12891303
++record_idx )
@@ -1292,13 +1306,10 @@ SK_RenderBackend_V2::output_s::nvapi_ctx_s::vblank_history_s::getVBlankHz (NvU32
12921306
records [record_idx];
12931307

12941308
if ( record.timestamp_ms != 0 &&
1295-
record.timestamp_ms >= (tNow - 750) ) // Use the 3/4 of a second
1309+
record.timestamp_ms >= (vblank_t0 - 500) )
12961310
{
12971311
++num_vblanks_in_period;
12981312

1299-
if ( vblank_t0 > record.timestamp_ms)
1300-
vblank_t0 = record.timestamp_ms;
1301-
13021313
if ( vblank_n < record.timestamp_ms)
13031314
vblank_n = record.timestamp_ms;
13041315

@@ -1333,15 +1344,15 @@ SK_RenderBackend_V2::output_s::nvapi_ctx_s::vblank_history_s::getVBlankHz (NvU32
13331344
static_cast <double> (rb.displays [rb.active_display].signal.timing.vsync_freq.Denominator)
13341345
);
13351346

1336-
if ( last_average > _MaxExpectedRefresh) last_average = _MaxExpectedRefresh * 1.01f;
1337-
if (last_last_average > _MaxExpectedRefresh) last_last_average = _MaxExpectedRefresh * 1.01f;
1338-
if ( new_average > _MaxExpectedRefresh) new_average = _MaxExpectedRefresh * 1.01f;
1347+
if ( last_average > _MaxExpectedRefresh) last_average = _MaxExpectedRefresh;
1348+
if (last_last_average > _MaxExpectedRefresh) last_last_average = _MaxExpectedRefresh;
1349+
if ( new_average > _MaxExpectedRefresh) new_average = _MaxExpectedRefresh;
13391350

13401351
if (last_average != 0.0f)
13411352
{
13421353
// Weighted rolling-average because this is really jittery
13431354
new_average =
1344-
(1.0f * last_average + 5.0f * new_average + 2.5f * last_last_average) / 8.5f;
1355+
(4.0f * last_average + 15.0f * new_average + last_last_average) * 0.05f;
13451356
}
13461357

13471358
last_last_average = last_average;
@@ -1350,14 +1361,14 @@ SK_RenderBackend_V2::output_s::nvapi_ctx_s::vblank_history_s::getVBlankHz (NvU32
13501361
static DWORD dwLastUpdate = tNow;
13511362
static float fLastAverage = last_average;
13521363

1353-
if (dwLastUpdate < tNow - 266)
1364+
if (dwLastUpdate < tNow - 333)
13541365
{ dwLastUpdate = tNow;
13551366

13561367
float fNewAverage =
1357-
(fLastAverage + 3 * last_average) / 4.0f;
1368+
new_average;
13581369

13591370
if (fNewAverage > _MaxExpectedRefresh)
1360-
fNewAverage = _MaxExpectedRefresh * 1.01f;
1371+
fNewAverage = _MaxExpectedRefresh;
13611372

13621373
fLastAverage = fNewAverage;
13631374
}

src/render/render_backend.cpp

+108
Original file line numberDiff line numberDiff line change
@@ -4857,4 +4857,112 @@ SK_RenderBackend_V2::output_s::setSDRWhiteLevel (float fNits)
48574857
}
48584858

48594859
return false;
4860+
}
4861+
4862+
4863+
void
4864+
SK_Render_CountVBlanks ()
4865+
{
4866+
static HANDLE hVRREvent =
4867+
SK_CreateEvent (nullptr, FALSE, TRUE, FALSE);
4868+
4869+
SK_RunOnce (
4870+
{
4871+
SK_Thread_CreateEx ([](LPVOID) -> DWORD
4872+
{
4873+
DXGI_FRAME_STATISTICS
4874+
frameStats = {};
4875+
4876+
auto& rb =
4877+
SK_GetCurrentRenderBackend ();
4878+
4879+
HANDLE vrr_events [] = { __SK_DLL_TeardownEvent, hVRREvent };
4880+
while (WaitForMultipleObjects (2, vrr_events, FALSE, 125) != WAIT_OBJECT_0)
4881+
{
4882+
SK_ComQIPtr <IDXGISwapChain> pSwapChain (rb.swapchain.p);
4883+
SK_ComPtr <IDXGIOutput> pOutput;
4884+
4885+
if ( pSwapChain.p != nullptr &&
4886+
SUCCEEDED (pSwapChain->GetContainingOutput (&pOutput.p)))
4887+
{
4888+
pSwapChain.Release ();
4889+
4890+
DwmFlush ();
4891+
4892+
pOutput->WaitForVBlank ();
4893+
4894+
pSwapChain = rb.swapchain.p;
4895+
pSwapChain->GetFrameStatistics (&frameStats);
4896+
}
4897+
4898+
if (pSwapChain.p != nullptr)
4899+
{
4900+
pSwapChain.Release ();
4901+
4902+
auto& nvapi_display =
4903+
rb.displays [rb.active_display].nvapi;
4904+
4905+
if (nvapi_display.display_handle != nullptr)
4906+
{
4907+
ULONGLONG& kSyncQPC =
4908+
(ULONGLONG&)frameStats.SyncQPCTime.QuadPart;
4909+
4910+
if (nvapi_display.vblank_counter.last_qpc_refreshed < kSyncQPC &&
4911+
nvapi_display.vblank_counter.addRecord (
4912+
nvapi_display.display_handle, &frameStats,(UINT32)(kSyncQPC/SK_QpcTicksPerMs)))
4913+
{
4914+
nvapi_display.vblank_counter.last_qpc_refreshed =
4915+
kSyncQPC;
4916+
}
4917+
}
4918+
}
4919+
4920+
else if (sk::NVAPI::nv_hardware)
4921+
{
4922+
pSwapChain.Release ();
4923+
4924+
//
4925+
// Sample NVIDIA's VBlank counter from this thread, because that API
4926+
// has massive performance penalties and this thread runs constantly
4927+
// with little to no real workload.
4928+
//
4929+
auto& nvapi_display =
4930+
rb.displays [rb.active_display].nvapi;
4931+
4932+
if (nvapi_display.display_handle != nullptr)
4933+
{
4934+
bool got_new_reading = false;
4935+
4936+
while (! got_new_reading)
4937+
{
4938+
const auto current_frame =
4939+
SK_GetFramesDrawn ();
4940+
4941+
if (nvapi_display.vblank_counter.last_frame_sampled < current_frame &&
4942+
nvapi_display.vblank_counter.addRecord (
4943+
nvapi_display.display_handle, nullptr, SK_timeGetTime ()))
4944+
{
4945+
nvapi_display.vblank_counter.last_frame_sampled = current_frame;
4946+
got_new_reading = true;
4947+
}
4948+
4949+
else
4950+
{
4951+
if (WaitForSingleObject (__SK_DLL_TeardownEvent, 2) != WAIT_OBJECT_0)
4952+
continue;
4953+
4954+
break;
4955+
}
4956+
}
4957+
}
4958+
}
4959+
}
4960+
4961+
SK_Thread_CloseSelf ();
4962+
4963+
return 0;
4964+
}, L"[SK] VBlank Counter", nullptr);
4965+
});
4966+
4967+
SetEvent (hVRREvent);
48604968
}

0 commit comments

Comments
 (0)