]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - drivers/nvme/host/core.c
nvme: check admin passthru command effects
[linux.git] / drivers / nvme / host / core.c
index acc816b67582f30524ad19f66843b071dfcef6ae..65fd2fc1ae3c20878fdbe5911be7ac7bee18801f 100644 (file)
 
 #define NVME_MINORS            (1U << MINORBITS)
 
-unsigned char admin_timeout = 60;
-module_param(admin_timeout, byte, 0644);
+unsigned int admin_timeout = 60;
+module_param(admin_timeout, uint, 0644);
 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
 EXPORT_SYMBOL_GPL(admin_timeout);
 
-unsigned char nvme_io_timeout = 30;
-module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
+unsigned int nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, uint, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 EXPORT_SYMBOL_GPL(nvme_io_timeout);
 
@@ -52,9 +52,6 @@ static u8 nvme_max_retries = 5;
 module_param_named(max_retries, nvme_max_retries, byte, 0644);
 MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
 
-static int nvme_char_major;
-module_param(nvme_char_major, int, 0);
-
 static unsigned long default_ps_max_latency_us = 100000;
 module_param(default_ps_max_latency_us, ulong, 0644);
 MODULE_PARM_DESC(default_ps_max_latency_us,
@@ -71,11 +68,13 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
 struct workqueue_struct *nvme_wq;
 EXPORT_SYMBOL_GPL(nvme_wq);
 
-static LIST_HEAD(nvme_ctrl_list);
-static DEFINE_SPINLOCK(dev_list_lock);
-
+static DEFINE_IDA(nvme_instance_ida);
+static dev_t nvme_chr_devt;
 static struct class *nvme_class;
 
+static void nvme_ns_remove(struct nvme_ns *ns);
+static int nvme_revalidate_disk(struct gendisk *disk);
+
 static __le32 nvme_get_log_dw10(u8 lid, size_t size)
 {
        return cpu_to_le32((((size / 4) - 1) << 16) | lid);
@@ -101,6 +100,51 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
        return ret;
 }
 
+static void nvme_delete_ctrl_work(struct work_struct *work)
+{
+       struct nvme_ctrl *ctrl =
+               container_of(work, struct nvme_ctrl, delete_work);
+
+       flush_work(&ctrl->reset_work);
+       nvme_stop_ctrl(ctrl);
+       nvme_remove_namespaces(ctrl);
+       ctrl->ops->delete_ctrl(ctrl);
+       nvme_uninit_ctrl(ctrl);
+       nvme_put_ctrl(ctrl);
+}
+
+int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
+{
+       if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
+               return -EBUSY;
+       if (!queue_work(nvme_wq, &ctrl->delete_work))
+               return -EBUSY;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
+
+int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+       int ret = 0;
+
+       /*
+        * Keep a reference until the work is flushed since ->delete_ctrl
+        * can free the controller.
+        */
+       nvme_get_ctrl(ctrl);
+       ret = nvme_delete_ctrl(ctrl);
+       if (!ret)
+               flush_work(&ctrl->delete_work);
+       nvme_put_ctrl(ctrl);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync);
+
+static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
+{
+       return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
+}
+
 static blk_status_t nvme_error_status(struct request *req)
 {
        switch (nvme_req(req)->status & 0x7ff) {
@@ -134,10 +178,10 @@ static inline bool nvme_req_needs_retry(struct request *req)
                return false;
        if (nvme_req(req)->status & NVME_SC_DNR)
                return false;
-       if (jiffies - req->start_time >= req->timeout)
-               return false;
        if (nvme_req(req)->retries >= nvme_max_retries)
                return false;
+       if (blk_queue_dying(req->q))
+               return false;
        return true;
 }
 
@@ -155,18 +199,13 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq);
 
 void nvme_cancel_request(struct request *req, void *data, bool reserved)
 {
-       int status;
-
        if (!blk_mq_request_started(req))
                return;
 
        dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
                                "Cancelling I/O %d", req->tag);
 
-       status = NVME_SC_ABORT_REQ;
-       if (blk_queue_dying(req->q))
-               status |= NVME_SC_DNR;
-       nvme_req(req)->status = status;
+       nvme_req(req)->status = NVME_SC_ABORT_REQ;
        blk_mq_complete_request(req);
 
 }
@@ -207,6 +246,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
        case NVME_CTRL_RECONNECTING:
                switch (old_state) {
                case NVME_CTRL_LIVE:
+               case NVME_CTRL_RESETTING:
                        changed = true;
                        /* FALLTHRU */
                default:
@@ -253,12 +293,6 @@ static void nvme_free_ns(struct kref *kref)
        if (ns->ndev)
                nvme_nvm_unregister(ns);
 
-       if (ns->disk) {
-               spin_lock(&dev_list_lock);
-               ns->disk->private_data = NULL;
-               spin_unlock(&dev_list_lock);
-       }
-
        put_disk(ns->disk);
        ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
        nvme_put_ctrl(ns->ctrl);
@@ -270,29 +304,6 @@ static void nvme_put_ns(struct nvme_ns *ns)
        kref_put(&ns->kref, nvme_free_ns);
 }
 
-static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
-{
-       struct nvme_ns *ns;
-
-       spin_lock(&dev_list_lock);
-       ns = disk->private_data;
-       if (ns) {
-               if (!kref_get_unless_zero(&ns->kref))
-                       goto fail;
-               if (!try_module_get(ns->ctrl->ops->module))
-                       goto fail_put_ns;
-       }
-       spin_unlock(&dev_list_lock);
-
-       return ns;
-
-fail_put_ns:
-       kref_put(&ns->kref, nvme_free_ns);
-fail:
-       spin_unlock(&dev_list_lock);
-       return NULL;
-}
-
 struct request *nvme_alloc_request(struct request_queue *q,
                struct nvme_command *cmd, unsigned int flags, int qid)
 {
@@ -469,16 +480,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
        u16 control = 0;
        u32 dsmgmt = 0;
 
-       /*
-        * If formated with metadata, require the block layer provide a buffer
-        * unless this namespace is formated such that the metadata can be
-        * stripped/generated by the controller with PRACT=1.
-        */
-       if (ns && ns->ms &&
-           (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
-           !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
-               return BLK_STS_NOTSUPP;
-
        if (req->cmd_flags & REQ_FUA)
                control |= NVME_RW_FUA;
        if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -497,6 +498,18 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
                nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
 
        if (ns->ms) {
+               /*
+                * If formated with metadata, the block layer always provides a
+                * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled.  Else
+                * we enable the PRACT bit for protection information or set the
+                * namespace capacity to zero to prevent any I/O.
+                */
+               if (!blk_integrity_rq(req)) {
+                       if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
+                               return BLK_STS_NOTSUPP;
+                       control |= NVME_RW_PRINFO_PRACT;
+               }
+
                switch (ns->pi_type) {
                case NVME_NS_DPS_PI_TYPE3:
                        control |= NVME_RW_PRINFO_PRCHK_GUARD;
@@ -509,8 +522,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
                                        nvme_block_nr(ns, blk_rq_pos(req)));
                        break;
                }
-               if (!blk_integrity_rq(req))
-                       control |= NVME_RW_PRINFO_PRACT;
        }
 
        cmnd->rw.control = cpu_to_le16(control);
@@ -984,12 +995,87 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
                        metadata, meta_len, io.slba, NULL, 0);
 }
 
+static u32 nvme_known_admin_effects(u8 opcode)
+{
+       switch (opcode) {
+       case nvme_admin_format_nvm:
+               return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
+                                       NVME_CMD_EFFECTS_CSE_MASK;
+       case nvme_admin_sanitize_nvm:
+               return NVME_CMD_EFFECTS_CSE_MASK;
+       default:
+               break;
+       }
+       return 0;
+}
+
+static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+                                                               u8 opcode)
+{
+       u32 effects = 0;
+
+       if (ns) {
+               if (ctrl->effects)
+                       effects = le32_to_cpu(ctrl->effects->iocs[opcode]);
+               if (effects & ~NVME_CMD_EFFECTS_CSUPP)
+                       dev_warn(ctrl->device,
+                                "IO command:%02x has unhandled effects:%08x\n",
+                                opcode, effects);
+               return 0;
+       }
+
+       if (ctrl->effects)
+               effects = le32_to_cpu(ctrl->effects->iocs[opcode]);
+       else
+               effects = nvme_known_admin_effects(opcode);
+
+       /*
+        * For simplicity, IO to all namespaces is quiesced even if the command
+        * effects say only one namespace is affected.
+        */
+       if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
+               nvme_start_freeze(ctrl);
+               nvme_wait_freeze(ctrl);
+       }
+       return effects;
+}
+
+static void nvme_update_formats(struct nvme_ctrl *ctrl)
+{
+       struct nvme_ns *ns;
+
+       mutex_lock(&ctrl->namespaces_mutex);
+       list_for_each_entry(ns, &ctrl->namespaces, list) {
+               if (ns->disk && nvme_revalidate_disk(ns->disk))
+                       nvme_ns_remove(ns);
+       }
+       mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
+{
+       /*
+        * Revalidate LBA changes prior to unfreezing. This is necessary to
+        * prevent memory corruption if a logical block size was changed by
+        * this command.
+        */
+       if (effects & NVME_CMD_EFFECTS_LBCC)
+               nvme_update_formats(ctrl);
+       if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK))
+               nvme_unfreeze(ctrl);
+       if (effects & NVME_CMD_EFFECTS_CCC)
+               nvme_init_identify(ctrl);
+       if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
+               nvme_queue_scan(ctrl);
+}
+
 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
                        struct nvme_passthru_cmd __user *ucmd)
 {
        struct nvme_passthru_cmd cmd;
        struct nvme_command c;
        unsigned timeout = 0;
+       u32 effects;
        int status;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -1015,10 +1101,13 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
        if (cmd.timeout_ms)
                timeout = msecs_to_jiffies(cmd.timeout_ms);
 
+       effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
        status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
                        (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
                        (void __user *)(uintptr_t)cmd.metadata, cmd.metadata,
                        0, &cmd.result, timeout);
+       nvme_passthru_end(ctrl, effects);
+
        if (status >= 0) {
                if (put_user(cmd.result, &ucmd->result))
                        return -EFAULT;
@@ -1054,27 +1143,18 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
        }
 }
 
-#ifdef CONFIG_COMPAT
-static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
-                       unsigned int cmd, unsigned long arg)
-{
-       return nvme_ioctl(bdev, mode, cmd, arg);
-}
-#else
-#define nvme_compat_ioctl      NULL
-#endif
-
 static int nvme_open(struct block_device *bdev, fmode_t mode)
 {
-       return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
+       struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+       if (!kref_get_unless_zero(&ns->kref))
+               return -ENXIO;
+       return 0;
 }
 
 static void nvme_release(struct gendisk *disk, fmode_t mode)
 {
-       struct nvme_ns *ns = disk->private_data;
-
-       module_put(ns->ctrl->ops->module);
-       nvme_put_ns(ns);
+       nvme_put_ns(disk->private_data);
 }
 
 static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -1087,35 +1167,12 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
-               u16 bs)
-{
-       struct nvme_ns *ns = disk->private_data;
-       u16 old_ms = ns->ms;
-       u8 pi_type = 0;
-
-       ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
-       ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
-
-       /* PI implementation requires metadata equal t10 pi tuple size */
-       if (ns->ms == sizeof(struct t10_pi_tuple))
-               pi_type = id->dps & NVME_NS_DPS_PI_MASK;
-
-       if (blk_get_integrity(disk) &&
-           (ns->pi_type != pi_type || ns->ms != old_ms ||
-            bs != queue_logical_block_size(disk->queue) ||
-            (ns->ms && ns->ext)))
-               blk_integrity_unregister(disk);
-
-       ns->pi_type = pi_type;
-}
-
-static void nvme_init_integrity(struct nvme_ns *ns)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
 {
        struct blk_integrity integrity;
 
        memset(&integrity, 0, sizeof(integrity));
-       switch (ns->pi_type) {
+       switch (pi_type) {
        case NVME_NS_DPS_PI_TYPE3:
                integrity.profile = &t10_pi_type3_crc;
                integrity.tag_size = sizeof(u16) + sizeof(u32);
@@ -1131,16 +1188,12 @@ static void nvme_init_integrity(struct nvme_ns *ns)
                integrity.profile = NULL;
                break;
        }
-       integrity.tuple_size = ns->ms;
-       blk_integrity_register(ns->disk, &integrity);
-       blk_queue_max_integrity_segments(ns->queue, 1);
+       integrity.tuple_size = ms;
+       blk_integrity_register(disk, &integrity);
+       blk_queue_max_integrity_segments(disk->queue, 1);
 }
 #else
-static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
-               u16 bs)
-{
-}
-static void nvme_init_integrity(struct nvme_ns *ns)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
 {
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
@@ -1151,29 +1204,26 @@ static void nvme_set_chunk_size(struct nvme_ns *ns)
        blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
 }
 
-static void nvme_config_discard(struct nvme_ns *ns)
+static void nvme_config_discard(struct nvme_ctrl *ctrl,
+               unsigned stream_alignment, struct request_queue *queue)
 {
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       u32 logical_block_size = queue_logical_block_size(ns->queue);
+       u32 size = queue_logical_block_size(queue);
+
+       if (stream_alignment)
+               size *= stream_alignment;
 
        BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
                        NVME_DSM_MAX_RANGES);
 
-       if (ctrl->nr_streams && ns->sws && ns->sgs) {
-               unsigned int sz = logical_block_size * ns->sws * ns->sgs;
+       queue->limits.discard_alignment = size;
+       queue->limits.discard_granularity = size;
 
-               ns->queue->limits.discard_alignment = sz;
-               ns->queue->limits.discard_granularity = sz;
-       } else {
-               ns->queue->limits.discard_alignment = logical_block_size;
-               ns->queue->limits.discard_granularity = logical_block_size;
-       }
-       blk_queue_max_discard_sectors(ns->queue, UINT_MAX);
-       blk_queue_max_discard_segments(ns->queue, NVME_DSM_MAX_RANGES);
-       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+       blk_queue_max_discard_sectors(queue, UINT_MAX);
+       blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
+       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue);
 
        if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
-               blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX);
+               blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
 }
 
 static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
@@ -1193,11 +1243,34 @@ static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
        }
 }
 
+static void nvme_update_disk_info(struct gendisk *disk,
+               struct nvme_ns *ns, struct nvme_id_ns *id)
+{
+       sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
+       unsigned stream_alignment = 0;
+
+       if (ns->ctrl->nr_streams && ns->sws && ns->sgs)
+               stream_alignment = ns->sws * ns->sgs;
+
+       blk_mq_freeze_queue(disk->queue);
+       blk_integrity_unregister(disk);
+
+       blk_queue_logical_block_size(disk->queue, 1 << ns->lba_shift);
+       if (ns->ms && !ns->ext &&
+           (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
+               nvme_init_integrity(disk, ns->ms, ns->pi_type);
+       if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
+               capacity = 0;
+       set_capacity(disk, capacity);
+
+       if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+               nvme_config_discard(ns->ctrl, stream_alignment, disk->queue);
+       blk_mq_unfreeze_queue(disk->queue);
+}
+
 static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 {
        struct nvme_ns *ns = disk->private_data;
-       struct nvme_ctrl *ctrl = ns->ctrl;
-       u16 bs;
 
        /*
         * If identify namespace failed, use default 512 byte block size so
@@ -1206,26 +1279,18 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
        ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
        if (ns->lba_shift == 0)
                ns->lba_shift = 9;
-       bs = 1 << ns->lba_shift;
        ns->noiob = le16_to_cpu(id->noiob);
+       ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+       ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
+       /* the PI implementation requires metadata equal t10 pi tuple size */
+       if (ns->ms == sizeof(struct t10_pi_tuple))
+               ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
+       else
+               ns->pi_type = 0;
 
-       blk_mq_freeze_queue(disk->queue);
-
-       if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
-               nvme_prep_integrity(disk, id, bs);
-       blk_queue_logical_block_size(ns->queue, bs);
        if (ns->noiob)
                nvme_set_chunk_size(ns);
-       if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
-               nvme_init_integrity(ns);
-       if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
-               set_capacity(disk, 0);
-       else
-               set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
-
-       if (ctrl->oncs & NVME_CTRL_ONCS_DSM)
-               nvme_config_discard(ns);
-       blk_mq_unfreeze_queue(disk->queue);
+       nvme_update_disk_info(disk, ns, id);
 }
 
 static int nvme_revalidate_disk(struct gendisk *disk)
@@ -1382,7 +1447,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit);
 static const struct block_device_operations nvme_fops = {
        .owner          = THIS_MODULE,
        .ioctl          = nvme_ioctl,
-       .compat_ioctl   = nvme_compat_ioctl,
+       .compat_ioctl   = nvme_ioctl,
        .open           = nvme_open,
        .release        = nvme_release,
        .getgeo         = nvme_getgeo,
@@ -1763,6 +1828,37 @@ static void nvme_init_subnqn(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
        memset(ctrl->subnqn + off, 0, sizeof(ctrl->subnqn) - off);
 }
 
+static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
+                       size_t size)
+{
+       struct nvme_command c = { };
+
+       c.common.opcode = nvme_admin_get_log_page;
+       c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
+       c.common.cdw10[0] = nvme_get_log_dw10(log_page, size);
+
+       return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
+}
+
+static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
+{
+       int ret;
+
+       if (!ctrl->effects)
+               ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL);
+
+       if (!ctrl->effects)
+               return 0;
+
+       ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects,
+                                       sizeof(*ctrl->effects));
+       if (ret) {
+               kfree(ctrl->effects);
+               ctrl->effects = NULL;
+       }
+       return ret;
+}
+
 /*
  * Initialize the cached copies of the Identify data and various controller
  * register in our nvme_ctrl structure.  This should be called as soon as
@@ -1798,6 +1894,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
                return -EIO;
        }
 
+       if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
+               ret = nvme_get_effects_log(ctrl);
+               if (ret < 0)
+                       return ret;
+       }
+
        nvme_init_subnqn(ctrl, id);
 
        if (!ctrl->identified) {
@@ -1932,33 +2034,12 @@ EXPORT_SYMBOL_GPL(nvme_init_identify);
 
 static int nvme_dev_open(struct inode *inode, struct file *file)
 {
-       struct nvme_ctrl *ctrl;
-       int instance = iminor(inode);
-       int ret = -ENODEV;
-
-       spin_lock(&dev_list_lock);
-       list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
-               if (ctrl->instance != instance)
-                       continue;
-
-               if (!ctrl->admin_q) {
-                       ret = -EWOULDBLOCK;
-                       break;
-               }
-               if (!kref_get_unless_zero(&ctrl->kref))
-                       break;
-               file->private_data = ctrl;
-               ret = 0;
-               break;
-       }
-       spin_unlock(&dev_list_lock);
-
-       return ret;
-}
+       struct nvme_ctrl *ctrl =
+               container_of(inode->i_cdev, struct nvme_ctrl, cdev);
 
-static int nvme_dev_release(struct inode *inode, struct file *file)
-{
-       nvme_put_ctrl(file->private_data);
+       if (ctrl->state != NVME_CTRL_LIVE)
+               return -EWOULDBLOCK;
+       file->private_data = ctrl;
        return 0;
 }
 
@@ -2022,7 +2103,6 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 static const struct file_operations nvme_dev_fops = {
        .owner          = THIS_MODULE,
        .open           = nvme_dev_open,
-       .release        = nvme_dev_release,
        .unlocked_ioctl = nvme_dev_ioctl,
        .compat_ioctl   = nvme_dev_ioctl,
 };
@@ -2188,7 +2268,7 @@ static ssize_t nvme_sysfs_delete(struct device *dev,
        struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
 
        if (device_remove_file_self(dev, attr))
-               ctrl->ops->delete_ctrl(ctrl);
+               nvme_delete_ctrl_sync(ctrl);
        return count;
 }
 static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
@@ -2300,7 +2380,8 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        mutex_lock(&ctrl->namespaces_mutex);
        list_for_each_entry(ns, &ctrl->namespaces, list) {
                if (ns->ns_id == nsid) {
-                       kref_get(&ns->kref);
+                       if (!kref_get_unless_zero(&ns->kref))
+                               continue;
                        ret = ns;
                        break;
                }
@@ -2403,7 +2484,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        list_add_tail(&ns->list, &ctrl->namespaces);
        mutex_unlock(&ctrl->namespaces_mutex);
 
-       kref_get(&ctrl->kref);
+       nvme_get_ctrl(ctrl);
 
        kfree(id);
 
@@ -2590,7 +2671,7 @@ static void nvme_async_event_work(struct work_struct *work)
                container_of(work, struct nvme_ctrl, async_event_work);
 
        spin_lock_irq(&ctrl->lock);
-       while (ctrl->event_limit > 0) {
+       while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) {
                int aer_idx = --ctrl->event_limit;
 
                spin_unlock_irq(&ctrl->lock);
@@ -2616,18 +2697,13 @@ static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
 
 static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
 {
-       struct nvme_command c = { };
        struct nvme_fw_slot_info_log *log;
 
        log = kmalloc(sizeof(*log), GFP_KERNEL);
        if (!log)
                return;
 
-       c.common.opcode = nvme_admin_get_log_page;
-       c.common.nsid = cpu_to_le32(NVME_NSID_ALL);
-       c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log));
-
-       if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log)))
+       if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log)))
                dev_warn(ctrl->device,
                                "Get FW SLOT INFO log error\n");
        kfree(log);
@@ -2661,7 +2737,7 @@ static void nvme_fw_act_work(struct work_struct *work)
                return;
 
        nvme_start_queues(ctrl);
-       /* read FW slot informationi to clear the AER*/
+       /* read FW slot information to clear the AER */
        nvme_get_fw_slot_info(ctrl);
 }
 
@@ -2677,7 +2753,8 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
                /*FALLTHRU*/
        case NVME_SC_ABORT_REQ:
                ++ctrl->event_limit;
-               queue_work(nvme_wq, &ctrl->async_event_work);
+               if (ctrl->state == NVME_CTRL_LIVE)
+                       queue_work(nvme_wq, &ctrl->async_event_work);
                break;
        default:
                break;
@@ -2692,7 +2769,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
                nvme_queue_scan(ctrl);
                break;
        case NVME_AER_NOTICE_FW_ACT_STARTING:
-               schedule_work(&ctrl->fw_act_work);
+               queue_work(nvme_wq, &ctrl->fw_act_work);
                break;
        default:
                dev_warn(ctrl->device, "async event result %08x\n", result);
@@ -2707,35 +2784,6 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_queue_async_events);
 
-static DEFINE_IDA(nvme_instance_ida);
-
-static int nvme_set_instance(struct nvme_ctrl *ctrl)
-{
-       int instance, error;
-
-       do {
-               if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
-                       return -ENODEV;
-
-               spin_lock(&dev_list_lock);
-               error = ida_get_new(&nvme_instance_ida, &instance);
-               spin_unlock(&dev_list_lock);
-       } while (error == -EAGAIN);
-
-       if (error)
-               return -ENODEV;
-
-       ctrl->instance = instance;
-       return 0;
-}
-
-static void nvme_release_instance(struct nvme_ctrl *ctrl)
-{
-       spin_lock(&dev_list_lock);
-       ida_remove(&nvme_instance_ida, ctrl->instance);
-       spin_unlock(&dev_list_lock);
-}
-
 void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
        nvme_stop_keep_alive(ctrl);
@@ -2760,31 +2808,22 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
 
 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
 {
-       device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
-
-       spin_lock(&dev_list_lock);
-       list_del(&ctrl->node);
-       spin_unlock(&dev_list_lock);
+       cdev_device_del(&ctrl->cdev, ctrl->device);
 }
 EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
 
-static void nvme_free_ctrl(struct kref *kref)
+static void nvme_free_ctrl(struct device *dev)
 {
-       struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+       struct nvme_ctrl *ctrl =
+               container_of(dev, struct nvme_ctrl, ctrl_device);
 
-       put_device(ctrl->device);
-       nvme_release_instance(ctrl);
+       ida_simple_remove(&nvme_instance_ida, ctrl->instance);
        ida_destroy(&ctrl->ns_ida);
+       kfree(ctrl->effects);
 
        ctrl->ops->free_ctrl(ctrl);
 }
 
-void nvme_put_ctrl(struct nvme_ctrl *ctrl)
-{
-       kref_put(&ctrl->kref, nvme_free_ctrl);
-}
-EXPORT_SYMBOL_GPL(nvme_put_ctrl);
-
 /*
  * Initialize a NVMe controller structures.  This needs to be called during
  * earliest initialization so that we have the initialized structured around
@@ -2799,32 +2838,38 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
        spin_lock_init(&ctrl->lock);
        INIT_LIST_HEAD(&ctrl->namespaces);
        mutex_init(&ctrl->namespaces_mutex);
-       kref_init(&ctrl->kref);
        ctrl->dev = dev;
        ctrl->ops = ops;
        ctrl->quirks = quirks;
        INIT_WORK(&ctrl->scan_work, nvme_scan_work);
        INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
        INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
+       INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
 
-       ret = nvme_set_instance(ctrl);
-       if (ret)
+       ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
+       if (ret < 0)
                goto out;
-
-       ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
-                               MKDEV(nvme_char_major, ctrl->instance),
-                               ctrl, nvme_dev_attr_groups,
-                               "nvme%d", ctrl->instance);
-       if (IS_ERR(ctrl->device)) {
-               ret = PTR_ERR(ctrl->device);
+       ctrl->instance = ret;
+
+       device_initialize(&ctrl->ctrl_device);
+       ctrl->device = &ctrl->ctrl_device;
+       ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance);
+       ctrl->device->class = nvme_class;
+       ctrl->device->parent = ctrl->dev;
+       ctrl->device->groups = nvme_dev_attr_groups;
+       ctrl->device->release = nvme_free_ctrl;
+       dev_set_drvdata(ctrl->device, ctrl);
+       ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
+       if (ret)
                goto out_release_instance;
-       }
-       get_device(ctrl->device);
-       ida_init(&ctrl->ns_ida);
 
-       spin_lock(&dev_list_lock);
-       list_add_tail(&ctrl->node, &nvme_ctrl_list);
-       spin_unlock(&dev_list_lock);
+       cdev_init(&ctrl->cdev, &nvme_dev_fops);
+       ctrl->cdev.owner = ops->module;
+       ret = cdev_device_add(&ctrl->cdev, ctrl->device);
+       if (ret)
+               goto out_free_name;
+
+       ida_init(&ctrl->ns_ida);
 
        /*
         * Initialize latency tolerance controls.  The sysfs files won't
@@ -2835,8 +2880,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
                min(default_ps_max_latency_us, (unsigned long)S32_MAX));
 
        return 0;
+out_free_name:
+       kfree_const(dev->kobj.name);
 out_release_instance:
-       nvme_release_instance(ctrl);
+       ida_simple_remove(&nvme_instance_ida, ctrl->instance);
 out:
        return ret;
 }
@@ -2945,6 +2992,16 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
+int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
+{
+       if (!ctrl->ops->reinit_request)
+               return 0;
+
+       return blk_mq_tagset_iter(set, set->driver_data,
+                       ctrl->ops->reinit_request);
+}
+EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
+
 int __init nvme_core_init(void)
 {
        int result;
@@ -2954,12 +3011,9 @@ int __init nvme_core_init(void)
        if (!nvme_wq)
                return -ENOMEM;
 
-       result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
-                                                       &nvme_dev_fops);
+       result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
        if (result < 0)
                goto destroy_wq;
-       else if (result > 0)
-               nvme_char_major = result;
 
        nvme_class = class_create(THIS_MODULE, "nvme");
        if (IS_ERR(nvme_class)) {
@@ -2970,7 +3024,7 @@ int __init nvme_core_init(void)
        return 0;
 
 unregister_chrdev:
-       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+       unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
 destroy_wq:
        destroy_workqueue(nvme_wq);
        return result;
@@ -2979,7 +3033,7 @@ int __init nvme_core_init(void)
 void nvme_core_exit(void)
 {
        class_destroy(nvme_class);
-       __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+       unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
        destroy_workqueue(nvme_wq);
 }