Skip to content

Commit d916b1b

Browse files
Keith BuschChristoph Hellwig
authored andcommitted
nvme-pci: use host managed power state for suspend
The nvme pci driver prepares its devices for power loss during suspend by shutting down the controllers. The power setting is deferred to pci driver's power management before the platform removes power. The suspend-to-idle mode, however, does not remove power. NVMe devices that implement host managed power settings can achieve lower power and better transition latencies than using generic PCI power settings. Try to use this feature if the platform is not involved with the suspend. If successful, restore the previous power state on resume. Tested-by: Kai-Heng Feng <[email protected]> Tested-by: Mario Limonciello <[email protected]> Reviewed-by: Rafael J. Wysocki <[email protected]> Signed-off-by: Keith Busch <[email protected]> Signed-off-by: Sagi Grimberg <[email protected]> [hch: fixed the compilation for the !CONFIG_PM_SLEEP case] Signed-off-by: Christoph Hellwig <[email protected]>
1 parent 7a1f46e commit d916b1b

File tree

1 file changed

+92
-3
lines changed

1 file changed

+92
-3
lines changed

drivers/nvme/host/pci.c

Lines changed: 92 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <linux/mutex.h>
1919
#include <linux/once.h>
2020
#include <linux/pci.h>
21+
#include <linux/suspend.h>
2122
#include <linux/t10-pi.h>
2223
#include <linux/types.h>
2324
#include <linux/io-64-nonatomic-lo-hi.h>
@@ -116,6 +117,7 @@ struct nvme_dev {
116117
u32 cmbsz;
117118
u32 cmbloc;
118119
struct nvme_ctrl ctrl;
120+
u32 last_ps;
119121

120122
mempool_t *iod_mempool;
121123

@@ -2835,26 +2837,111 @@ static void nvme_remove(struct pci_dev *pdev)
28352837
}
28362838

28372839
#ifdef CONFIG_PM_SLEEP
2840+
static int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps)
2841+
{
2842+
return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps);
2843+
}
2844+
2845+
static int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps)
2846+
{
2847+
return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL);
2848+
}
2849+
2850+
static int nvme_resume(struct device *dev)
2851+
{
2852+
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
2853+
struct nvme_ctrl *ctrl = &ndev->ctrl;
2854+
2855+
if (pm_resume_via_firmware() || !ctrl->npss ||
2856+
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
2857+
nvme_reset_ctrl(ctrl);
2858+
return 0;
2859+
}
2860+
28382861
static int nvme_suspend(struct device *dev)
28392862
{
28402863
struct pci_dev *pdev = to_pci_dev(dev);
28412864
struct nvme_dev *ndev = pci_get_drvdata(pdev);
2865+
struct nvme_ctrl *ctrl = &ndev->ctrl;
2866+
int ret = -EBUSY;
2867+
2868+
/*
2869+
* The platform does not remove power for a kernel managed suspend so
2870+
* use host managed nvme power settings for lowest idle power if
2871+
* possible. This should have quicker resume latency than a full device
2872+
* shutdown. But if the firmware is involved after the suspend or the
2873+
* device does not support any non-default power states, shut down the
2874+
* device fully.
2875+
*/
2876+
if (pm_suspend_via_firmware() || !ctrl->npss) {
2877+
nvme_dev_disable(ndev, true);
2878+
return 0;
2879+
}
2880+
2881+
nvme_start_freeze(ctrl);
2882+
nvme_wait_freeze(ctrl);
2883+
nvme_sync_queues(ctrl);
2884+
2885+
if (ctrl->state != NVME_CTRL_LIVE &&
2886+
ctrl->state != NVME_CTRL_ADMIN_ONLY)
2887+
goto unfreeze;
2888+
2889+
ndev->last_ps = 0;
2890+
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
2891+
if (ret < 0)
2892+
goto unfreeze;
2893+
2894+
ret = nvme_set_power_state(ctrl, ctrl->npss);
2895+
if (ret < 0)
2896+
goto unfreeze;
2897+
2898+
if (ret) {
2899+
/*
2900+
* Clearing npss forces a controller reset on resume. The
2901+
* correct value will be resdicovered then.
2902+
*/
2903+
nvme_dev_disable(ndev, true);
2904+
ctrl->npss = 0;
2905+
ret = 0;
2906+
goto unfreeze;
2907+
}
2908+
/*
2909+
* A saved state prevents pci pm from generically controlling the
2910+
* device's power. If we're using protocol specific settings, we don't
2911+
* want pci interfering.
2912+
*/
2913+
pci_save_state(pdev);
2914+
unfreeze:
2915+
nvme_unfreeze(ctrl);
2916+
return ret;
2917+
}
2918+
2919+
static int nvme_simple_suspend(struct device *dev)
2920+
{
2921+
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
28422922

28432923
nvme_dev_disable(ndev, true);
28442924
return 0;
28452925
}
28462926

2847-
static int nvme_resume(struct device *dev)
2927+
static int nvme_simple_resume(struct device *dev)
28482928
{
28492929
struct pci_dev *pdev = to_pci_dev(dev);
28502930
struct nvme_dev *ndev = pci_get_drvdata(pdev);
28512931

28522932
nvme_reset_ctrl(&ndev->ctrl);
28532933
return 0;
28542934
}
2855-
#endif
28562935

2857-
static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
2936+
const struct dev_pm_ops nvme_dev_pm_ops = {
2937+
.suspend = nvme_suspend,
2938+
.resume = nvme_resume,
2939+
.freeze = nvme_simple_suspend,
2940+
.thaw = nvme_simple_resume,
2941+
.poweroff = nvme_simple_suspend,
2942+
.restore = nvme_simple_resume,
2943+
};
2944+
#endif /* CONFIG_PM_SLEEP */
28582945

28592946
static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
28602947
pci_channel_state_t state)
@@ -2959,9 +3046,11 @@ static struct pci_driver nvme_driver = {
29593046
.probe = nvme_probe,
29603047
.remove = nvme_remove,
29613048
.shutdown = nvme_shutdown,
3049+
#ifdef CONFIG_PM_SLEEP
29623050
.driver = {
29633051
.pm = &nvme_dev_pm_ops,
29643052
},
3053+
#endif
29653054
.sriov_configure = pci_sriov_configure_simple,
29663055
.err_handler = &nvme_err_handler,
29673056
};

0 commit comments

Comments
 (0)