]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - drivers/nvme/host/core.c
nvme: fix regression upon hot device removal and insertion
[linux.git] / drivers / nvme / host / core.c
index 1b7c2afd84cbb5f4971cb2ef8fba6a48a518da4c..3077cd4d75bfb829990123962d5deb1d7b32b1c2 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/hdreg.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/backing-dev.h>
 #include <linux/list_sort.h>
 #include <linux/slab.h>
 #include <linux/types.h>
@@ -1113,15 +1114,15 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
        return id;
 }
 
-static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
-                     void *buffer, size_t buflen, u32 *result)
+static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
+               unsigned int dword11, void *buffer, size_t buflen, u32 *result)
 {
        struct nvme_command c;
        union nvme_result res;
        int ret;
 
        memset(&c, 0, sizeof(c));
-       c.features.opcode = nvme_admin_set_features;
+       c.features.opcode = op;
        c.features.fid = cpu_to_le32(fid);
        c.features.dword11 = cpu_to_le32(dword11);
 
@@ -1132,6 +1133,24 @@ static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword
        return ret;
 }
 
+int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid,
+                     unsigned int dword11, void *buffer, size_t buflen,
+                     u32 *result)
+{
+       return nvme_features(dev, nvme_admin_set_features, fid, dword11, buffer,
+                            buflen, result);
+}
+EXPORT_SYMBOL_GPL(nvme_set_features);
+
+int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid,
+                     unsigned int dword11, void *buffer, size_t buflen,
+                     u32 *result)
+{
+       return nvme_features(dev, nvme_admin_get_features, fid, dword11, buffer,
+                            buflen, result);
+}
+EXPORT_SYMBOL_GPL(nvme_get_features);
+
 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
 {
        u32 q_count = (*count - 1) | ((*count - 1) << 16);
@@ -1608,6 +1627,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
 {
        sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
        unsigned short bs = 1 << ns->lba_shift;
+       u32 atomic_bs, phys_bs, io_opt;
 
        if (ns->lba_shift > PAGE_SHIFT) {
                /* unsupported block size, set capacity to 0 later */
@@ -1616,9 +1636,37 @@ static void nvme_update_disk_info(struct gendisk *disk,
        blk_mq_freeze_queue(disk->queue);
        blk_integrity_unregister(disk);
 
+       if (id->nabo == 0) {
+               /*
+                * Bit 1 indicates whether NAWUPF is defined for this namespace
+                * and whether it should be used instead of AWUPF. If NAWUPF ==
+                * 0 then AWUPF must be used instead.
+                */
+               if (id->nsfeat & (1 << 1) && id->nawupf)
+                       atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
+               else
+                       atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
+       } else {
+               atomic_bs = bs;
+       }
+       phys_bs = bs;
+       io_opt = bs;
+       if (id->nsfeat & (1 << 4)) {
+               /* NPWG = Namespace Preferred Write Granularity */
+               phys_bs *= 1 + le16_to_cpu(id->npwg);
+               /* NOWS = Namespace Optimal Write Size */
+               io_opt *= 1 + le16_to_cpu(id->nows);
+       }
+
        blk_queue_logical_block_size(disk->queue, bs);
-       blk_queue_physical_block_size(disk->queue, bs);
-       blk_queue_io_min(disk->queue, bs);
+       /*
+        * Linux filesystems assume writing a single physical block is
+        * an atomic operation. Hence limit the physical block size to the
+        * value of the Atomic Write Unit Power Fail parameter.
+        */
+       blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
+       blk_queue_io_min(disk->queue, phys_bs);
+       blk_queue_io_opt(disk->queue, io_opt);
 
        if (ns->ms && !ns->ext &&
            (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
@@ -2368,8 +2416,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
        lockdep_assert_held(&nvme_subsystems_lock);
 
        list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
-               if (ctrl->state == NVME_CTRL_DELETING ||
-                   ctrl->state == NVME_CTRL_DEAD)
+               if (tmp->state == NVME_CTRL_DELETING ||
+                   tmp->state == NVME_CTRL_DEAD)
                        continue;
 
                if (tmp->cntlid == ctrl->cntlid) {
@@ -2415,6 +2463,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
        memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
        subsys->vendor_id = le16_to_cpu(id->vid);
        subsys->cmic = id->cmic;
+       subsys->awupf = le16_to_cpu(id->awupf);
 #ifdef CONFIG_NVME_MULTIPATH
        subsys->iopolicy = NVME_IOPOLICY_NUMA;
 #endif
@@ -3256,6 +3305,10 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
                goto out_free_ns;
        }
 
+       if (ctrl->opts->data_digest)
+               ns->queue->backing_dev_info->capabilities
+                       |= BDI_CAP_STABLE_WRITES;
+
        blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
        if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
                blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
@@ -3318,7 +3371,7 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
 
        nvme_mpath_add_disk(ns, id);
-       nvme_fault_inject_init(ns);
+       nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
        kfree(id);
 
        return 0;
@@ -3343,7 +3396,15 @@ static void nvme_ns_remove(struct nvme_ns *ns)
        if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
                return;
 
-       nvme_fault_inject_fini(ns);
+       nvme_fault_inject_fini(&ns->fault_inject);
+
+       mutex_lock(&ns->ctrl->subsys->lock);
+       list_del_rcu(&ns->siblings);
+       mutex_unlock(&ns->ctrl->subsys->lock);
+       synchronize_rcu(); /* guarantee not available in head->list */
+       nvme_mpath_clear_current_path(ns);
+       synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
+
        if (ns->disk && ns->disk->flags & GENHD_FL_UP) {
                del_gendisk(ns->disk);
                blk_cleanup_queue(ns->queue);
@@ -3351,16 +3412,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
                        blk_integrity_unregister(ns->disk);
        }
 
-       mutex_lock(&ns->ctrl->subsys->lock);
-       list_del_rcu(&ns->siblings);
-       nvme_mpath_clear_current_path(ns);
-       mutex_unlock(&ns->ctrl->subsys->lock);
-
        down_write(&ns->ctrl->namespaces_rwsem);
        list_del_init(&ns->list);
        up_write(&ns->ctrl->namespaces_rwsem);
 
-       synchronize_srcu(&ns->head->srcu);
        nvme_mpath_check_last_path(ns);
        nvme_put_ns(ns);
 }
@@ -3400,7 +3455,8 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
 {
        struct nvme_ns *ns;
        __le32 *ns_list;
-       unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
+       unsigned i, j, nsid, prev = 0;
+       unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024);
        int ret = 0;
 
        ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
@@ -3701,6 +3757,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
 
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
 {
+       nvme_fault_inject_fini(&ctrl->fault_inject);
        dev_pm_qos_hide_latency_tolerance(ctrl->device);
        cdev_device_del(&ctrl->cdev, ctrl->device);
 }
@@ -3796,6 +3853,8 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
        dev_pm_qos_update_user_latency_tolerance(ctrl->device,
                min(default_ps_max_latency_us, (unsigned long)S32_MAX));
 
+       nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
+
        return 0;
 out_free_name:
        kfree_const(ctrl->device->kobj.name);