pytorch · vfdev-5 · Feb 21, 2026 · Feb 16, 2026 · Feb 21, 2026 · Feb 21, 2026
diff --git a/docs/source/metrics.rst b/docs/source/metrics.rst
@@ -360,6 +360,7 @@ Complete list of metrics
     Entropy
     KLDivergence
     JSDivergence
+    MatthewsCorrCoef
     MaximumMeanDiscrepancy
     HSIC
     AveragePrecision

diff --git a/ignite/metrics/__init__.py b/ignite/metrics/__init__.py
@@ -24,6 +24,7 @@
 from ignite.metrics.mean_average_precision import MeanAveragePrecision
 from ignite.metrics.mean_pairwise_distance import MeanPairwiseDistance
 from ignite.metrics.mean_squared_error import MeanSquaredError
+from ignite.metrics.matthews_corrcoef import MatthewsCorrCoef
 from ignite.metrics.metric import BatchFiltered, BatchWise, EpochWise, Metric, MetricUsage
 from ignite.metrics.metric_group import MetricGroup
 from ignite.metrics.metrics_lambda import MetricsLambda
@@ -56,6 +57,7 @@
     "MeanAbsoluteError",
     "MeanPairwiseDistance",
     "MeanSquaredError",
+    "MatthewsCorrCoef",
     "ConfusionMatrix",
     "CosineSimilarity",
     "ClassificationReport",

diff --git a/ignite/metrics/matthews_corrcoef.py b/ignite/metrics/matthews_corrcoef.py
@@ -0,0 +1,97 @@
+from typing import Callable, Union
+
+import torch
+
+from ignite.metrics.epoch_metric import EpochMetric
+
+
+def matthews_corrcoef_compute_fn(y_preds: torch.Tensor, y_targets: torch.Tensor) -> float:
+    from sklearn.metrics import matthews_corrcoef
+
+    if y_preds.ndim == 2 and y_targets.ndim == 2:
+        y_preds = torch.argmax(y_preds, dim=1)
+        y_targets = torch.argmax(y_targets, dim=1)
+    elif y_preds.ndim == 2 and y_targets.ndim == 1:
+        y_preds = torch.argmax(y_preds, dim=1)
+    elif y_preds.ndim == 1 and y_targets.ndim == 2:
+        raise ValueError(
+            "Incoherent types between input y_pred and stored predictions: y_pred is 1D while y_target is 2D"
+        )
+
+    y_true = y_targets.cpu().numpy()
+    y_pred = y_preds.cpu().numpy()
+    return matthews_corrcoef(y_true, y_pred)
+
+
+class MatthewsCorrCoef(EpochMetric):
+    """
+    Compute the Matthews correlation coefficient (MCC).
+
+    The Matthews correlation coefficient is used in machine learning as a measure of the quality of binary and multiclass classifications.
+    It takes into account true and false positives and negatives and is generally regarded as a balanced measure which can be used even if the classes are of very different sizes.
+    The MCC is in essence a correlation coefficient value between -1 and +1. A coefficient of +1 represents a perfect prediction, 0 an average random prediction and -1 an inverse prediction.
+
+    This metric is suitable for both binary and multiclass classification.
+    In the binary case, it is calculated using the entries of the confusion matrix, whereas for multiclass tasks, it is computed as a generalized correlation coefficient.
+
+    In case of multiclass classification with shape (N, C) for y_pred and (N, C) for y, the predicted class is determined by the argmax of y_pred and y.
+    In case of multiclass classification with shape (N, C) for y_pred and (N,) for y, the predicted class is determined by the argmax of y_pred and the true class is determined by the value in y.
+
+        Args:
+            output_transform: a callable that is used to transform the
+                :class:`~ignite.engine.engine.Engine`'s ``process_function``'s output into the
+                form expected by the metric. This can be useful if, for example, you have a multi-output model and
+                you want to compute the metric with respect to one of the outputs.
+                By default, this metric requires the output as ``(x, y)``.
+            device: specifies which device updates are accumulated on. Setting the
+                metric's device to be the same as your ``update`` arguments ensures the ``update`` method is
+                non-blocking. By default, CPU.
+            check_compute_fn: if True, compute_fn is run on the first batch of data to ensure there are no issues.
+                If issues exist, user is warned that there might be an issue with the compute_fn. Default, True.
+            skip_unrolling: specifies whether output should be unrolled before being fed to update method. Should be
+                true for multi-output model, for example, if ``y_pred`` contains multi-ouput as ``(y_pred_a, y_pred_b)``
+                Alternatively, ``output_transform`` can be used to handle this.
+
+            Examples:
+
+            .. include:: defaults.rst
+                :start-after: :orphan:
+
+
+            .. testcode::
+
+                y_pred = torch.tensor([+1, +1, +1, -1])
+                y_true = torch.tensor([+1, -1, +1, +1])
+
+                matthews_corrcoef = MatthewsCorrCoef()
+                matthews_corrcoef.attach(default_evaluator, 'mcc')
+                state = default_evaluator.run([[y_pred, y_true]])
+                print(state.metrics['mcc'])
+
+            .. testoutput::
+
+                -0.33...
+
+        .. versionadded:: 0.6.0
+
+    """
+
+    def __init__(
+        self,
+        output_transform: Callable = lambda x: x,
+        check_compute_fn: bool = False,
+        device: Union[str, torch.device] = torch.device("cpu"),
+        skip_unrolling: bool = False,
+    ):
+        try:
+            from sklearn.metrics import matthews_corrcoef  # noqa: F401
+        except ImportError:
+            raise ModuleNotFoundError("This metric module requires scikit-learn to be installed.")
+
+        super().__init__(
+            matthews_corrcoef_compute_fn,
+            output_transform=output_transform,
+            check_compute_fn=check_compute_fn,
+            device=device,
+            skip_unrolling=skip_unrolling,
+        )
diff --git a/tests/ignite/metrics/test_matthews_corrcoef.py b/tests/ignite/metrics/test_matthews_corrcoef.py
@@ -0,0 +1,143 @@
+from unittest.mock import patch
+
+import pytest
+import sklearn
+import torch
+from sklearn.metrics import matthews_corrcoef
+
+from ignite.engine import Engine
+from ignite.exceptions import NotComputableError
+from ignite.metrics import MatthewsCorrCoef
+
+torch.manual_seed(12)
+
+
+@pytest.fixture()
+def mock_no_sklearn():
+    with patch.dict("sys.modules", {"sklearn.metrics": None}):
+        yield sklearn
+
+
+def test_no_sklearn(mock_no_sklearn):
+    with pytest.raises(ModuleNotFoundError, match=r"This metric module requires scikit-learn to be installed."):
+        MatthewsCorrCoef()
+
+
+def test_no_update():
+    mcc = MatthewsCorrCoef()
+
+    with pytest.raises(
+        NotComputableError, match=r"EpochMetric must have at least one example before it can be computed"
+    ):
+        mcc.compute()
+
+
+def test_input_types():
+    mcc = MatthewsCorrCoef()
+    mcc.reset()
+    output1 = (torch.rand(4, 3), torch.randint(0, 2, size=(4, 3), dtype=torch.long))
+    mcc.update(output1)
+
+    with pytest.raises(ValueError, match=r"Incoherent types between input y_pred and stored predictions"):
+        mcc.update((torch.randint(0, 5, size=(4, 3)), torch.randint(0, 2, size=(4, 3))))
+
+    with pytest.raises(ValueError, match=r"Incoherent types between input y and stored targets"):
+        mcc.update((torch.rand(4, 3), torch.randint(0, 2, size=(4, 3)).to(torch.int32)))
+
+    with pytest.raises(ValueError, match=r"Incoherent types between input y_pred and stored predictions"):
+        mcc.update((torch.randint(0, 2, size=(10,)).long(), torch.randint(0, 2, size=(10, 5)).long()))
+
+
+def test_check_shape():
+    mcc = MatthewsCorrCoef()
+
+    with pytest.raises(ValueError, match=r"Predictions should be of shape"):
+        mcc._check_shape((torch.tensor(0), torch.tensor(0)))
+
+    with pytest.raises(ValueError, match=r"Predictions should be of shape"):
+        mcc._check_shape((torch.rand(4, 3, 1), torch.rand(4, 3)))
+
+    with pytest.raises(ValueError, match=r"Targets should be of shape"):
+        mcc._check_shape((torch.rand(4, 3), torch.rand(4, 3, 1)))
+
+
+@pytest.fixture(params=range(4))
+def test_data_binary(request):
+    return [
+        # Binary input data of shape (N,) or (N, 1)
+        (torch.randint(0, 2, size=(10,)).long(), torch.randint(0, 2, size=(10,)).long(), 1),
+        (torch.randint(0, 2, size=(10, 1)).long(), torch.randint(0, 2, size=(10, 1)).long(), 1),
+        # updated batches
+        (torch.randint(0, 2, size=(50,)).long(), torch.randint(0, 2, size=(50,)).long(), 16),
+        (torch.randint(0, 2, size=(50, 1)).long(), torch.randint(0, 2, size=(50, 1)).long(), 16),
+    ][request.param]
+
+
+@pytest.mark.parametrize("n_times", range(2))
+def test_binary_input(n_times, test_data_binary, available_device):
+    y_pred, y, batch_size = test_data_binary
+    mcc = MatthewsCorrCoef(device=available_device)
+    assert mcc._device == torch.device(available_device)
+
+    mcc.reset()
+    if batch_size > 1:
+        n_iters = y.shape[0] // batch_size + 1
+        for i in range(n_iters):
+            idx = i * batch_size
+            mcc.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
+    else:
+        mcc.update((y_pred, y))
+
+    np_y = y.numpy().ravel()
+    np_y_pred = y_pred.numpy().ravel()
+
+    assert isinstance(mcc.compute(), float)
+    assert matthews_corrcoef(np_y, np_y_pred) == pytest.approx(mcc.compute())
+
+
+@pytest.fixture(params=range(2))
+def test_data_multiclass(request):
+    return [
+        # Multiclass input data of shape (N,)
+        (torch.randint(0, 5, size=(10,)).long(), torch.randint(0, 5, size=(10,)).long(), 1),
+        # updated batches
+        (torch.randint(0, 5, size=(50,)).long(), torch.randint(0, 5, size=(50,)).long(), 16),
+    ][request.param]
+
+
+@pytest.mark.parametrize("n_times", range(2))
+def test_multiclass_input(n_times, test_data_multiclass, available_device):
+    y_pred, y, batch_size = test_data_multiclass
+    mcc = MatthewsCorrCoef(device=available_device)
+    assert mcc._device == torch.device(available_device)
+
+    mcc.reset()
+    if batch_size > 1:
+        n_iters = y.shape[0] // batch_size + 1
+        for i in range(n_iters):
+            idx = i * batch_size
+            mcc.update((y_pred[idx : idx + batch_size], y[idx : idx + batch_size]))
+    else:
+        mcc.update((y_pred, y))
+
+    np_y = y.numpy().ravel()
+    np_y_pred = y_pred.numpy().ravel()
+
+    assert isinstance(mcc.compute(), float)
+    assert matthews_corrcoef(np_y, np_y_pred) == pytest.approx(mcc.compute())
+
+
+def test_integration(available_device):
+    y_pred = torch.tensor([1, 0, 1, 1])
+    y_true = torch.tensor([1, 1, 0, 1])
+
+    def update_fn(engine, batch):
+        return y_pred, y_true
+
+    evaluator = Engine(update_fn)
+    mcc = MatthewsCorrCoef(device=available_device)
+    mcc.attach(evaluator, "mcc")
+
+    state = evaluator.run([None], max_epochs=1)
+
+    assert state.metrics["mcc"] == pytest.approx(matthews_corrcoef(y_true.numpy(), y_pred.numpy()))