]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge tag 'ceph-for-4.19-rc3' of https://github.com/ceph/ceph-client
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Sep 2018 17:57:59 +0000 (10:57 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Sep 2018 17:57:59 +0000 (10:57 -0700)
Pull ceph fixes from Ilya Dryomov:
 "Two rbd patches to complete support for images within namespaces that
  went into -rc1 and a use-after-free fix.

  The rbd changes have been sitting in a branch for quite a while but
  couldn't be included into the -rc1 pull request because of a pending
  wire protocol backwards compatibility fixup that only got committed
  early this week"

* tag 'ceph-for-4.19-rc3' of https://github.com/ceph/ceph-client:
  rbd: support cloning across namespaces
  rbd: factor out get_parent_info()
  ceph: avoid a use-after-free in ceph_destroy_options()

drivers/block/rbd.c
fs/ceph/super.c

index 7915f3b03736eadebf623801802bdb1d7bd42f75..73ed5f3a862dfcde598227671d55c2491d33408b 100644 (file)
@@ -4207,11 +4207,13 @@ static ssize_t rbd_parent_show(struct device *dev,
 
                count += sprintf(&buf[count], "%s"
                            "pool_id %llu\npool_name %s\n"
+                           "pool_ns %s\n"
                            "image_id %s\nimage_name %s\n"
                            "snap_id %llu\nsnap_name %s\n"
                            "overlap %llu\n",
                            !count ? "" : "\n", /* first? */
                            spec->pool_id, spec->pool_name,
+                           spec->pool_ns ?: "",
                            spec->image_id, spec->image_name ?: "(unknown)",
                            spec->snap_id, spec->snap_name,
                            rbd_dev->parent_overlap);
@@ -4584,47 +4586,177 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
                                                &rbd_dev->header.features);
 }
 
+struct parent_image_info {
+       u64             pool_id;
+       const char      *pool_ns;
+       const char      *image_id;
+       u64             snap_id;
+
+       bool            has_overlap;
+       u64             overlap;
+};
+
+/*
+ * The caller is responsible for @pii.
+ */
+static int decode_parent_image_spec(void **p, void *end,
+                                   struct parent_image_info *pii)
+{
+       u8 struct_v;
+       u32 struct_len;
+       int ret;
+
+       ret = ceph_start_decoding(p, end, 1, "ParentImageSpec",
+                                 &struct_v, &struct_len);
+       if (ret)
+               return ret;
+
+       ceph_decode_64_safe(p, end, pii->pool_id, e_inval);
+       pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
+       if (IS_ERR(pii->pool_ns)) {
+               ret = PTR_ERR(pii->pool_ns);
+               pii->pool_ns = NULL;
+               return ret;
+       }
+       pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
+       if (IS_ERR(pii->image_id)) {
+               ret = PTR_ERR(pii->image_id);
+               pii->image_id = NULL;
+               return ret;
+       }
+       ceph_decode_64_safe(p, end, pii->snap_id, e_inval);
+       return 0;
+
+e_inval:
+       return -EINVAL;
+}
+
+static int __get_parent_info(struct rbd_device *rbd_dev,
+                            struct page *req_page,
+                            struct page *reply_page,
+                            struct parent_image_info *pii)
+{
+       struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       size_t reply_len = PAGE_SIZE;
+       void *p, *end;
+       int ret;
+
+       ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+                            "rbd", "parent_get", CEPH_OSD_FLAG_READ,
+                            req_page, sizeof(u64), reply_page, &reply_len);
+       if (ret)
+               return ret == -EOPNOTSUPP ? 1 : ret;
+
+       p = page_address(reply_page);
+       end = p + reply_len;
+       ret = decode_parent_image_spec(&p, end, pii);
+       if (ret)
+               return ret;
+
+       ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+                            "rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ,
+                            req_page, sizeof(u64), reply_page, &reply_len);
+       if (ret)
+               return ret;
+
+       p = page_address(reply_page);
+       end = p + reply_len;
+       ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval);
+       if (pii->has_overlap)
+               ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
+       return 0;
+
+e_inval:
+       return -EINVAL;
+}
+
+/*
+ * The caller is responsible for @pii.
+ */
+static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
+                                   struct page *req_page,
+                                   struct page *reply_page,
+                                   struct parent_image_info *pii)
+{
+       struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       size_t reply_len = PAGE_SIZE;
+       void *p, *end;
+       int ret;
+
+       ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+                            "rbd", "get_parent", CEPH_OSD_FLAG_READ,
+                            req_page, sizeof(u64), reply_page, &reply_len);
+       if (ret)
+               return ret;
+
+       p = page_address(reply_page);
+       end = p + reply_len;
+       ceph_decode_64_safe(&p, end, pii->pool_id, e_inval);
+       pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
+       if (IS_ERR(pii->image_id)) {
+               ret = PTR_ERR(pii->image_id);
+               pii->image_id = NULL;
+               return ret;
+       }
+       ceph_decode_64_safe(&p, end, pii->snap_id, e_inval);
+       pii->has_overlap = true;
+       ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
+       return 0;
+
+e_inval:
+       return -EINVAL;
+}
+
+static int get_parent_info(struct rbd_device *rbd_dev,
+                          struct parent_image_info *pii)
+{
+       struct page *req_page, *reply_page;
+       void *p;
+       int ret;
+
+       req_page = alloc_page(GFP_KERNEL);
+       if (!req_page)
+               return -ENOMEM;
+
+       reply_page = alloc_page(GFP_KERNEL);
+       if (!reply_page) {
+               __free_page(req_page);
+               return -ENOMEM;
+       }
+
+       p = page_address(req_page);
+       ceph_encode_64(&p, rbd_dev->spec->snap_id);
+       ret = __get_parent_info(rbd_dev, req_page, reply_page, pii);
+       if (ret > 0)
+               ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page,
+                                              pii);
+
+       __free_page(req_page);
+       __free_page(reply_page);
+       return ret;
+}
+
 static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 {
        struct rbd_spec *parent_spec;
-       size_t size;
-       void *reply_buf = NULL;
-       __le64 snapid;
-       void *p;
-       void *end;
-       u64 pool_id;
-       char *image_id;
-       u64 snap_id;
-       u64 overlap;
+       struct parent_image_info pii = { 0 };
        int ret;
 
        parent_spec = rbd_spec_alloc();
        if (!parent_spec)
                return -ENOMEM;
 
-       size = sizeof (__le64) +                                /* pool_id */
-               sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX +        /* image_id */
-               sizeof (__le64) +                               /* snap_id */
-               sizeof (__le64);                                /* overlap */
-       reply_buf = kmalloc(size, GFP_KERNEL);
-       if (!reply_buf) {
-               ret = -ENOMEM;
+       ret = get_parent_info(rbd_dev, &pii);
+       if (ret)
                goto out_err;
-       }
 
-       snapid = cpu_to_le64(rbd_dev->spec->snap_id);
-       ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
-                                 &rbd_dev->header_oloc, "get_parent",
-                                 &snapid, sizeof(snapid), reply_buf, size);
-       dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
-       if (ret < 0)
-               goto out_err;
+       dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+            __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
+            pii.has_overlap, pii.overlap);
 
-       p = reply_buf;
-       end = reply_buf + ret;
-       ret = -ERANGE;
-       ceph_decode_64_safe(&p, end, pool_id, out_err);
-       if (pool_id == CEPH_NOPOOL) {
+       if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
                /*
                 * Either the parent never existed, or we have
                 * record of it but the image got flattened so it no
@@ -4633,6 +4765,10 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
                 * overlap to 0.  The effect of this is that all new
                 * requests will be treated as if the image had no
                 * parent.
+                *
+                * If !pii.has_overlap, the parent image spec is not
+                * applicable.  It's there to avoid duplication in each
+                * snapshot record.
                 */
                if (rbd_dev->parent_overlap) {
                        rbd_dev->parent_overlap = 0;
@@ -4647,51 +4783,36 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
        /* The ceph file layout needs to fit pool id in 32 bits */
 
        ret = -EIO;
-       if (pool_id > (u64)U32_MAX) {
+       if (pii.pool_id > (u64)U32_MAX) {
                rbd_warn(NULL, "parent pool id too large (%llu > %u)",
-                       (unsigned long long)pool_id, U32_MAX);
+                       (unsigned long long)pii.pool_id, U32_MAX);
                goto out_err;
        }
 
-       image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
-       if (IS_ERR(image_id)) {
-               ret = PTR_ERR(image_id);
-               goto out_err;
-       }
-       ceph_decode_64_safe(&p, end, snap_id, out_err);
-       ceph_decode_64_safe(&p, end, overlap, out_err);
-
        /*
         * The parent won't change (except when the clone is
         * flattened, already handled that).  So we only need to
         * record the parent spec we have not already done so.
         */
        if (!rbd_dev->parent_spec) {
-               parent_spec->pool_id = pool_id;
-               parent_spec->image_id = image_id;
-               parent_spec->snap_id = snap_id;
-
-               /* TODO: support cloning across namespaces */
-               if (rbd_dev->spec->pool_ns) {
-                       parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns,
-                                                      GFP_KERNEL);
-                       if (!parent_spec->pool_ns) {
-                               ret = -ENOMEM;
-                               goto out_err;
-                       }
+               parent_spec->pool_id = pii.pool_id;
+               if (pii.pool_ns && *pii.pool_ns) {
+                       parent_spec->pool_ns = pii.pool_ns;
+                       pii.pool_ns = NULL;
                }
+               parent_spec->image_id = pii.image_id;
+               pii.image_id = NULL;
+               parent_spec->snap_id = pii.snap_id;
 
                rbd_dev->parent_spec = parent_spec;
                parent_spec = NULL;     /* rbd_dev now owns this */
-       } else {
-               kfree(image_id);
        }
 
        /*
         * We always update the parent overlap.  If it's zero we issue
         * a warning, as we will proceed as if there was no parent.
         */
-       if (!overlap) {
+       if (!pii.overlap) {
                if (parent_spec) {
                        /* refresh, careful to warn just once */
                        if (rbd_dev->parent_overlap)
@@ -4702,14 +4823,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
                        rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
                }
        }
-       rbd_dev->parent_overlap = overlap;
+       rbd_dev->parent_overlap = pii.overlap;
 
 out:
        ret = 0;
 out_err:
-       kfree(reply_buf);
+       kfree(pii.pool_ns);
+       kfree(pii.image_id);
        rbd_spec_put(parent_spec);
-
        return ret;
 }
 
index 43ca3b763875d43c83ae43e9bd6b4b475493ac46..eab1359d05532afb6960c7acfb5c4fee2891eac9 100644 (file)
@@ -602,6 +602,8 @@ static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
 
 /*
  * create a new fs client
+ *
+ * Success or not, this function consumes @fsopt and @opt.
  */
 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
                                        struct ceph_options *opt)
@@ -609,17 +611,20 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
        struct ceph_fs_client *fsc;
        int page_count;
        size_t size;
-       int err = -ENOMEM;
+       int err;
 
        fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
-       if (!fsc)
-               return ERR_PTR(-ENOMEM);
+       if (!fsc) {
+               err = -ENOMEM;
+               goto fail;
+       }
 
        fsc->client = ceph_create_client(opt, fsc);
        if (IS_ERR(fsc->client)) {
                err = PTR_ERR(fsc->client);
                goto fail;
        }
+       opt = NULL; /* fsc->client now owns this */
 
        fsc->client->extra_mon_dispatch = extra_mon_dispatch;
        fsc->client->osdc.abort_on_full = true;
@@ -677,6 +682,9 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
        ceph_destroy_client(fsc->client);
 fail:
        kfree(fsc);
+       if (opt)
+               ceph_destroy_options(opt);
+       destroy_mount_options(fsopt);
        return ERR_PTR(err);
 }
 
@@ -1042,8 +1050,6 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
        fsc = create_fs_client(fsopt, opt);
        if (IS_ERR(fsc)) {
                res = ERR_CAST(fsc);
-               destroy_mount_options(fsopt);
-               ceph_destroy_options(opt);
                goto out_final;
        }