[AMDGPU] Simplicy the logic in checkWMMACoexecutionHazards, NFC#200717
Merged
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Changpeng Fang (changpeng) Changes1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8ab90d953200e..368f84e193a4f 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2048,45 +2048,49 @@ static bool isCoexecutableVALUInst(const MachineInstr &MI) {
!SIInstrInfo::isSWMMAC(MI) && !SIInstrInfo::isLDSDMA(MI);
}
-static bool IsWMMAHazardInstInCategory(const MachineInstr &MI,
- const SIInstrInfo *TII, unsigned Latency,
- unsigned Category) {
- assert(TII->isXDLWMMA(MI) && (Latency == 8 || Latency == 16) &&
- "Handle me if the xdl wmma instruction latency changes");
-
- switch (Category) {
- case 0: // Dense WMMA Instructions:
- // WMMA_*F16, WMMA_*BF16
- // WMMA_*FP8FP8
- // WMMA_*FP8BF8
- // WMMA_*BF8FP8
- // WMMA_*BF8BF8
- // WMMA_*F8F6F4 if SRCA & SRCB != F8
- return Latency == 8 && SIInstrInfo::isWMMA(MI);
-
- case 1: // Dense WMMA Instructions:
- // WMMA_IU8
- // WMMA_IU4
- // WMMA_*F8F6F4 if SRCA OR SRCB == F8
- return Latency == 16 && SIInstrInfo::isWMMA(MI);
-
- case 2: // Dense SWMMAC Instructions
- // SWMMAC_*F16, SWMMAC_*BF16,
- // SWMMAC_*FP8FP8
- // SWMMAC_*BF8FP8
- // SWMMAC_*FP8BF8
- // SWMMAC_*BF8BF8
- return Latency == 8 && SIInstrInfo::isSWMMAC(MI);
-
- case 3: // Sparse WMMA Instructions:
- // SWMMAC_IU8
- // SWMMAC_IU4
- return Latency == 16 && SIInstrInfo::isSWMMAC(MI);
- default:
+// Classify XDL WMMA instructions into co-execution hazard categories
+// (Refer to SPG 4.6.12.1), mainly based on instruction latency.
+//
+// Category 0: WMMA with Latency 8
+// WMMA_*F16, WMMA_*BF16
+// WMMA_*FP8FP8
+// WMMA_*FP8BF8
+// WMMA_*BF8FP8
+// WMMA_*BF8BF8
+// WMMA_*F8F6F4 if SRCA & SRCB != F8
+//
+// Category 1: WMMA Latency 16
+// WMMA_IU8
+// WMMA_*F8F6F4 if SRCA OR SRCB == F8
+//
+// Category 2: SWMMAC with Latency 8
+// SWMMAC_*F16, SWMMAC_*BF16,
+// SWMMAC_*FP8FP8
+// SWMMAC_*BF8FP8
+// SWMMAC_*FP8BF8
+// SWMMAC_*BF8BF8
+//
+// Category 3: SWMMAC with Latency 16
+// SWMMAC_IU8
+static unsigned getWMMAHazardInstInCategory(const MachineInstr &MI,
+ const SIInstrInfo *TII,
+ unsigned Latency) {
+ assert(TII->isXDLWMMA(MI) && "must be xdl wmma");
+ bool IsSWMMAC = SIInstrInfo::isSWMMAC(MI);
+ unsigned Category = 0;
+
+ switch (Latency) {
+ case 8:
+ Category = IsSWMMAC ? 2 : 0;
+ break;
+ case 16:
+ Category = IsSWMMAC ? 3 : 1;
break;
+ default:
+ llvm_unreachable("unexpected xdl wmma latency");
} // end switch.
- return false;
+ return Category;
}
int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) const {
@@ -2111,8 +2115,7 @@ int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) const {
return false;
unsigned Latency = TSchedModel.computeInstrLatency(&I);
- if (!IsWMMAHazardInstInCategory(I, TII, Latency, Category))
- return false;
+ Category = getWMMAHazardInstInCategory(I, TII, Latency);
return hasWMMAToWMMARegOverlap(I, *MI);
};
@@ -2122,39 +2125,29 @@ int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) const {
return false;
unsigned Latency = TSchedModel.computeInstrLatency(&I);
- if (!IsWMMAHazardInstInCategory(I, TII, Latency, Category))
- return false;
+ Category = getWMMAHazardInstInCategory(I, TII, Latency);
return hasWMMAToVALURegOverlap(I, *MI);
};
- int Limit = 0;
-
auto GetWaitStatesFn = [](const MachineInstr &I) {
return SIInstrInfo::isVALU(I) ? 1 : 0;
};
int WaitStatesNeeded = -1;
+ int ExistingVALUs = 0; // Existing number of VALU ops in between.
+
+ // getWaitStatesSince checks for a hazard between instruction 'I' and 'MI':
+ // - If a hazard exists: returns the number of VALUs in between and sets
+ // 'Category' via IsWMMAHazardFn/IsVALUHazardFn for instruction 'I'.
+ // - If no hazard exists: returns INT_MAX, making WaitStatesNeeded negative,
+ // so no V_NOP insertion is needed.
if (TII->isXDLWMMA(*MI)) {
- for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
- Limit = WMMAWaitStates[Category]; // for IsExpiredFn.
- // 'getWaitStatesSince' returns the number of VALUs in between if hazard
- // exists, and INT_MAX if there is no hazard. As a result, a negative
- // WaitStatesNeeded here means no hazard, and we will continue to search
- // for other categories.
- WaitStatesNeeded =
- Limit - getWaitStatesSince(IsWMMAHazardFn, Limit, GetWaitStatesFn);
- }
+ ExistingVALUs = getWaitStatesSince(IsWMMAHazardFn, 9, GetWaitStatesFn);
+ WaitStatesNeeded = WMMAWaitStates[Category] - ExistingVALUs;
} else { // Must be a co-executable VALU.
- for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
- Limit = VALUWaitStates[Category]; // for IsExpiredFn.
- // 'getWaitStatesSince' returns the number of VALUs in between if hazard
- // exists, and INT_MAX if there is no hazard. As a result, a negative
- // WaitStatesNeeded here means no hazard, and we will continue to search
- // for other categories.
- WaitStatesNeeded =
- Limit - getWaitStatesSince(IsVALUHazardFn, Limit, GetWaitStatesFn);
- }
+ ExistingVALUs = getWaitStatesSince(IsVALUHazardFn, 8, GetWaitStatesFn);
+ WaitStatesNeeded = VALUWaitStates[Category] - ExistingVALUs;
}
return WaitStatesNeeded;
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
shiltian
reviewed
Jun 1, 2026
rampitec
reviewed
Jun 1, 2026
| WaitStatesNeeded = | ||
| Limit - getWaitStatesSince(IsWMMAHazardFn, Limit, GetWaitStatesFn); | ||
| } | ||
| ExistingVALUs = getWaitStatesSince(IsWMMAHazardFn, 9, GetWaitStatesFn); |
Contributor
There was a problem hiding this comment.
Define these 9 and 8 as const int with some meaningful name in the function please.
rampitec
approved these changes
Jun 1, 2026
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
No description provided.