#include <linux/namei.h>
#include <linux/fsnotify.h>
#include <linux/fadvise.h>
+#include <linux/eventpoll.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
u32 advice;
};
+struct io_epoll {
+ struct file *file;
+ int epfd;
+ int op;
+ int fd;
+ struct epoll_event event;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
struct io_files_update files_update;
struct io_fadvise fadvise;
struct io_madvise madvise;
+ struct io_epoll epoll;
};
struct io_async_ctx *io;
unsigned unbound_nonreg_file : 1;
/* opcode is not supported by this kernel */
unsigned not_supported : 1;
+ /* needs file table */
+ unsigned file_table : 1;
};
static const struct io_op_def io_op_defs[] = {
.needs_mm = 1,
.needs_file = 1,
.unbound_nonreg_file = 1,
+ .file_table = 1,
},
[IORING_OP_ASYNC_CANCEL] = {},
[IORING_OP_LINK_TIMEOUT] = {
[IORING_OP_OPENAT] = {
.needs_file = 1,
.fd_non_neg = 1,
+ .file_table = 1,
},
[IORING_OP_CLOSE] = {
.needs_file = 1,
+ .file_table = 1,
},
[IORING_OP_FILES_UPDATE] = {
.needs_mm = 1,
+ .file_table = 1,
},
[IORING_OP_STATX] = {
.needs_mm = 1,
[IORING_OP_OPENAT2] = {
.needs_file = 1,
.fd_non_neg = 1,
+ .file_table = 1,
+ },
+ [IORING_OP_EPOLL_CTL] = {
+ .unbound_nonreg_file = 1,
+ .file_table = 1,
},
};
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_files_update *ip,
unsigned nr_args);
+static int io_grab_files(struct io_kiocb *req);
static struct kmem_cache *req_cachep;
struct file *file;
int ret;
- if (force_nonblock) {
- req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+ if (force_nonblock)
return -EAGAIN;
- }
ret = build_open_flags(&req->open.how, &op);
if (ret)
return io_openat2(req, nxt, force_nonblock);
}
+static int io_epoll_ctl_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+#if defined(CONFIG_EPOLL)
+ if (sqe->ioprio || sqe->buf_index)
+ return -EINVAL;
+
+ req->epoll.epfd = READ_ONCE(sqe->fd);
+ req->epoll.op = READ_ONCE(sqe->len);
+ req->epoll.fd = READ_ONCE(sqe->off);
+
+ if (ep_op_has_event(req->epoll.op)) {
+ struct epoll_event __user *ev;
+
+ ev = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ if (copy_from_user(&req->epoll.event, ev, sizeof(*ev)))
+ return -EFAULT;
+ }
+
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int io_epoll_ctl(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+#if defined(CONFIG_EPOLL)
+ struct io_epoll *ie = &req->epoll;
+ int ret;
+
+ ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock);
+ if (force_nonblock && ret == -EAGAIN)
+ return -EAGAIN;
+
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, nxt);
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
return ret;
/* if the file has a flush method, be safe and punt to async */
- if (req->close.put_file->f_op->flush && !io_wq_current_is_worker()) {
- req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+ if (req->close.put_file->f_op->flush && !io_wq_current_is_worker())
goto eagain;
- }
/*
* No ->flush(), safely close from here and just punt the
ret = __io_accept(req, nxt, force_nonblock);
if (ret == -EAGAIN && force_nonblock) {
req->work.func = io_accept_finish;
- req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
io_put_req(req);
return -EAGAIN;
}
struct io_uring_files_update up;
int ret;
- if (force_nonblock) {
- req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+ if (force_nonblock)
return -EAGAIN;
- }
up.offset = req->files_update.offset;
up.fds = req->files_update.arg;
{
ssize_t ret = 0;
+ if (io_op_defs[req->opcode].file_table) {
+ ret = io_grab_files(req);
+ if (unlikely(ret))
+ return ret;
+ }
+
io_req_work_grab_env(req, &io_op_defs[req->opcode]);
switch (req->opcode) {
case IORING_OP_OPENAT2:
ret = io_openat2_prep(req, sqe);
break;
+ case IORING_OP_EPOLL_CTL:
+ ret = io_epoll_ctl_prep(req, sqe);
+ break;
default:
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
req->opcode);
}
ret = io_openat2(req, nxt, force_nonblock);
break;
+ case IORING_OP_EPOLL_CTL:
+ if (sqe) {
+ ret = io_epoll_ctl_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_epoll_ctl(req, nxt, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
int ret = -EBADF;
struct io_ring_ctx *ctx = req->ctx;
+ if (req->work.files)
+ return 0;
if (!ctx->ring_file)
return -EBADF;
if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
(req->flags & REQ_F_MUST_PUNT))) {
punt:
- if (req->work.flags & IO_WQ_WORK_NEEDS_FILES) {
+ if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (ret)
goto err;
static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_submit_state *state, struct io_kiocb **link)
{
+ const struct cred *old_creds = NULL;
struct io_ring_ctx *ctx = req->ctx;
unsigned int sqe_flags;
- int ret;
+ int ret, id;
sqe_flags = READ_ONCE(sqe->flags);
ret = -EINVAL;
goto err_req;
}
+
+ id = READ_ONCE(sqe->personality);
+ if (id) {
+ const struct cred *personality_creds;
+
+ personality_creds = idr_find(&ctx->personality_idr, id);
+ if (unlikely(!personality_creds)) {
+ ret = -EINVAL;
+ goto err_req;
+ }
+ old_creds = override_creds(personality_creds);
+ }
+
/* same numerical values with corresponding REQ_F_*, safe to copy */
req->flags |= sqe_flags & (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|
IOSQE_ASYNC);
err_req:
io_cqring_add_event(req, ret);
io_double_put_req(req);
+ if (old_creds)
+ revert_creds(old_creds);
return false;
}
}
}
+ if (old_creds)
+ revert_creds(old_creds);
return true;
}
struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
for (j = 0; j < imu->nr_bvecs; j++)
- put_user_page(imu->bvec[j].bv_page);
+ unpin_user_page(imu->bvec[j].bv_page);
if (ctx->account_mem)
io_unaccount_mem(ctx->user, imu->nr_bvecs);
ret = 0;
down_read(¤t->mm->mmap_sem);
- pret = get_user_pages(ubuf, nr_pages,
+ pret = pin_user_pages(ubuf, nr_pages,
FOLL_WRITE | FOLL_LONGTERM,
pages, vmas);
if (pret == nr_pages) {
* release any pages we did get
*/
if (pret > 0)
- put_user_pages(pages, pret);
+ unpin_user_pages(pages, pret);
if (ctx->account_mem)
io_unaccount_mem(ctx->user, nr_pages);
kvfree(imu->bvec);
struct io_ring_ctx *ctx = file->private_data;
io_uring_cancel_files(ctx, data);
- if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
- io_cqring_overflow_flush(ctx, true);
- io_wq_cancel_all(ctx->io_wq);
- }
return 0;
}
} else if (to_submit) {
struct mm_struct *cur_mm;
- if (current->mm != ctx->sqo_mm ||
- current_cred() != ctx->creds) {
- ret = -EPERM;
- goto out;
- }
-
mutex_lock(&ctx->uring_lock);
/* already have mm, so io_submit_sqes() won't try to grab it */
cur_mm = ctx->sqo_mm;