Skip to content

[AMDGPU] Simplicy the logic in checkWMMACoexecutionHazards, NFC#200717

Merged
changpeng merged 6 commits into
llvm:mainfrom
changpeng:wmma-coexecution-hazard-simplify
Jun 1, 2026
Merged

[AMDGPU] Simplicy the logic in checkWMMACoexecutionHazards, NFC#200717
changpeng merged 6 commits into
llvm:mainfrom
changpeng:wmma-coexecution-hazard-simplify

Conversation

@changpeng
Copy link
Copy Markdown
Contributor

No description provided.

@llvmorg-github-actions
Copy link
Copy Markdown

@llvm/pr-subscribers-backend-amdgpu

Author: Changpeng Fang (changpeng)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/200717.diff

1 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (+53-60)
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8ab90d953200e..368f84e193a4f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2048,45 +2048,49 @@ static bool isCoexecutableVALUInst(const MachineInstr &MI) {
          !SIInstrInfo::isSWMMAC(MI) && !SIInstrInfo::isLDSDMA(MI);
 }
 
-static bool IsWMMAHazardInstInCategory(const MachineInstr &MI,
-                                       const SIInstrInfo *TII, unsigned Latency,
-                                       unsigned Category) {
-  assert(TII->isXDLWMMA(MI) && (Latency == 8 || Latency == 16) &&
-         "Handle me if the xdl wmma instruction latency changes");
-
-  switch (Category) {
-  case 0: // Dense WMMA Instructions:
-          //   WMMA_*F16, WMMA_*BF16
-          //   WMMA_*FP8FP8
-          //   WMMA_*FP8BF8
-          //   WMMA_*BF8FP8
-          //   WMMA_*BF8BF8
-          //   WMMA_*F8F6F4 if SRCA & SRCB != F8
-    return Latency == 8 && SIInstrInfo::isWMMA(MI);
-
-  case 1: // Dense WMMA Instructions:
-          //   WMMA_IU8
-          //   WMMA_IU4
-          //   WMMA_*F8F6F4 if SRCA OR SRCB == F8
-    return Latency == 16 && SIInstrInfo::isWMMA(MI);
-
-  case 2: // Dense SWMMAC Instructions
-          //   SWMMAC_*F16, SWMMAC_*BF16,
-          //   SWMMAC_*FP8FP8
-          //   SWMMAC_*BF8FP8
-          //   SWMMAC_*FP8BF8
-          //   SWMMAC_*BF8BF8
-    return Latency == 8 && SIInstrInfo::isSWMMAC(MI);
-
-  case 3: // Sparse WMMA Instructions:
-          //   SWMMAC_IU8
-          //   SWMMAC_IU4
-    return Latency == 16 && SIInstrInfo::isSWMMAC(MI);
-  default:
+// Classify XDL WMMA instructions into co-execution hazard categories
+// (Refer to SPG 4.6.12.1), mainly based on instruction latency.
+//
+// Category 0: WMMA with Latency 8
+//   WMMA_*F16, WMMA_*BF16
+//   WMMA_*FP8FP8
+//   WMMA_*FP8BF8
+//   WMMA_*BF8FP8
+//   WMMA_*BF8BF8
+//   WMMA_*F8F6F4 if SRCA & SRCB != F8
+//
+// Category 1: WMMA Latency 16
+//   WMMA_IU8
+//   WMMA_*F8F6F4 if SRCA OR SRCB == F8
+//
+// Category 2: SWMMAC with Latency 8
+//   SWMMAC_*F16, SWMMAC_*BF16,
+//   SWMMAC_*FP8FP8
+//   SWMMAC_*BF8FP8
+//   SWMMAC_*FP8BF8
+//   SWMMAC_*BF8BF8
+//
+// Category 3: SWMMAC with Latency 16
+//   SWMMAC_IU8
+static unsigned getWMMAHazardInstInCategory(const MachineInstr &MI,
+                                            const SIInstrInfo *TII,
+                                            unsigned Latency) {
+  assert(TII->isXDLWMMA(MI) && "must be xdl wmma");
+  bool IsSWMMAC = SIInstrInfo::isSWMMAC(MI);
+  unsigned Category = 0;
+
+  switch (Latency) {
+  case 8:
+    Category =  IsSWMMAC ? 2 : 0;
+    break;
+  case 16:
+    Category =  IsSWMMAC ? 3 : 1;
     break;
+  default:
+    llvm_unreachable("unexpected xdl wmma latency");
   } // end switch.
 
-  return false;
+  return Category;
 }
 
 int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) const {
@@ -2111,8 +2115,7 @@ int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) const {
       return false;
 
     unsigned Latency = TSchedModel.computeInstrLatency(&I);
-    if (!IsWMMAHazardInstInCategory(I, TII, Latency, Category))
-      return false;
+    Category = getWMMAHazardInstInCategory(I, TII, Latency);
 
     return hasWMMAToWMMARegOverlap(I, *MI);
   };
@@ -2122,39 +2125,29 @@ int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) const {
       return false;
 
     unsigned Latency = TSchedModel.computeInstrLatency(&I);
-    if (!IsWMMAHazardInstInCategory(I, TII, Latency, Category))
-      return false;
+    Category = getWMMAHazardInstInCategory(I, TII, Latency);
 
     return hasWMMAToVALURegOverlap(I, *MI);
   };
 
-  int Limit = 0;
-
   auto GetWaitStatesFn = [](const MachineInstr &I) {
     return SIInstrInfo::isVALU(I) ? 1 : 0;
   };
 
   int WaitStatesNeeded = -1;
+  int ExistingVALUs = 0; // Existing number of VALU ops in between.
+
+  // getWaitStatesSince checks for a hazard between instruction 'I' and 'MI':
+  // - If a hazard exists: returns the number of VALUs in between and sets
+  //   'Category' via IsWMMAHazardFn/IsVALUHazardFn for instruction 'I'.
+  // - If no hazard exists: returns INT_MAX, making WaitStatesNeeded negative,
+  //   so no V_NOP insertion is needed.
   if (TII->isXDLWMMA(*MI)) {
-    for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
-      Limit = WMMAWaitStates[Category]; // for IsExpiredFn.
-      // 'getWaitStatesSince' returns the number of VALUs in between if hazard
-      // exists, and INT_MAX if there is no hazard. As a result, a negative
-      // WaitStatesNeeded here means no hazard, and we will continue to search
-      // for other categories.
-      WaitStatesNeeded =
-          Limit - getWaitStatesSince(IsWMMAHazardFn, Limit, GetWaitStatesFn);
-    }
+    ExistingVALUs = getWaitStatesSince(IsWMMAHazardFn, 9, GetWaitStatesFn);
+    WaitStatesNeeded = WMMAWaitStates[Category] - ExistingVALUs;
   } else { // Must be a co-executable VALU.
-    for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
-      Limit = VALUWaitStates[Category]; // for IsExpiredFn.
-      // 'getWaitStatesSince' returns the number of VALUs in between if hazard
-      // exists, and INT_MAX if there is no hazard. As a result, a negative
-      // WaitStatesNeeded here means no hazard, and we will continue to search
-      // for other categories.
-      WaitStatesNeeded =
-          Limit - getWaitStatesSince(IsVALUHazardFn, Limit, GetWaitStatesFn);
-    }
+    ExistingVALUs = getWaitStatesSince(IsVALUHazardFn, 8, GetWaitStatesFn);
+    WaitStatesNeeded = VALUWaitStates[Category] - ExistingVALUs;
   }
 
   return WaitStatesNeeded;

@github-actions
Copy link
Copy Markdown

github-actions Bot commented Jun 1, 2026

✅ With the latest revision this PR passed the C/C++ code formatter.

Comment thread llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Outdated
WaitStatesNeeded =
Limit - getWaitStatesSince(IsWMMAHazardFn, Limit, GetWaitStatesFn);
}
ExistingVALUs = getWaitStatesSince(IsWMMAHazardFn, 9, GetWaitStatesFn);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Define these 9 and 8 as const int with some meaningful name in the function please.

@changpeng changpeng merged commit 6bbbdba into llvm:main Jun 1, 2026
10 checks passed
@changpeng changpeng deleted the wmma-coexecution-hazard-simplify branch June 1, 2026 22:46
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants