clear L2 cache

ngc92 · ngc92 · commit 7c15075a3928 · 2025-06-24T13:04:04.000+02:00
diff --git a/problems/bioml/trimul/eval.py b/problems/bioml/trimul/eval.py
@@ -11,7 +11,7 @@
 
 import torch.cuda
 
-from utils import set_seed
+from utils import set_seed, clear_l2_cache
 try:
     from task import TestSpec
 except ImportError:
@@ -232,6 +232,7 @@ def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_t
         start_event = torch.cuda.Event(enable_timing=True)
         end_event = torch.cuda.Event(enable_timing=True)
         start_event.record()
+        clear_l2_cache()
         output = custom_kernel(data)
         end_event.record()
         torch.cuda.synchronize()
diff --git a/problems/bioml/trimul/utils.py b/problems/bioml/trimul/utils.py
@@ -2,6 +2,7 @@
 from typing import Tuple
 
 import numpy as np
+import cupy as cp
 import torch
 
 
@@ -156,4 +157,13 @@ def __enter__(self):
 
     def __exit__(self, exc_type, exc_value, traceback):
         torch.backends.cudnn.allow_tf32 = self.allow_tf32
-        torch.backends.cudnn.deterministic = self.deterministic
+        torch.backends.cudnn.deterministic = self.deterministic
+
+
+def clear_l2_cache():
+    cp.cuda.runtime.cudaDeviceSetLimit(cp.cuda.runtime.cudaLimitPersistingL2CacheSize, 0)
+    # create a large dummy tensor
+    dummy = torch.empty((32, 1024, 1024), dtype=torch.int64, device="cuda")
+    # write stuff to
+    dummy.fill_(42)
+    del dummy