#include "internal.h"
-#define IORING_MAX_ENTRIES 4096
+#define IORING_MAX_ENTRIES 32768
#define IORING_MAX_FIXED_FILES 1024
struct io_uring {
struct list_head list;
struct file *file;
- off_t io_end;
+ off_t io_start;
size_t io_len;
};
} ____cacheline_aligned_in_smp;
/* IO offload */
- struct workqueue_struct *sqo_wq;
+ struct workqueue_struct *sqo_wq[2];
struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm;
wait_queue_head_t sqo_wait;
static inline void io_queue_async_work(struct io_ring_ctx *ctx,
struct io_kiocb *req)
{
- queue_work(ctx->sqo_wq, &req->work);
+ int rw;
+
+ switch (req->submit.sqe->opcode) {
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ rw = !(req->rw.ki_flags & IOCB_DIRECT);
+ break;
+ default:
+ rw = 0;
+ break;
+ }
+
+ queue_work(ctx->sqo_wq[rw], &req->work);
}
static void io_commit_cqring(struct io_ring_ctx *ctx)
return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
}
+static inline bool io_should_merge(struct async_list *al, struct kiocb *kiocb)
+{
+ if (al->file == kiocb->ki_filp) {
+ off_t start, end;
+
+ /*
+ * Allow merging if we're anywhere in the range of the same
+ * page. Generally this happens for sub-page reads or writes,
+ * and it's beneficial to allow the first worker to bring the
+ * page in and the piggy backed work can then work on the
+ * cached page.
+ */
+ start = al->io_start & PAGE_MASK;
+ end = (al->io_start + al->io_len + PAGE_SIZE - 1) & PAGE_MASK;
+ if (kiocb->ki_pos >= start && kiocb->ki_pos <= end)
+ return true;
+ }
+
+ al->file = NULL;
+ return false;
+}
+
/*
* Make a note of the last file/offset/direction we punted to async
* context. We'll use this information to see if we can piggy back a
struct async_list *async_list = &req->ctx->pending_async[rw];
struct kiocb *kiocb = &req->rw;
struct file *filp = kiocb->ki_filp;
- off_t io_end = kiocb->ki_pos + len;
- if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) {
+ if (io_should_merge(async_list, kiocb)) {
unsigned long max_bytes;
/* Use 8x RA size as a decent limiter for both reads/writes */
req->flags |= REQ_F_SEQ_PREV;
async_list->io_len += len;
} else {
- io_end = 0;
- async_list->io_len = 0;
+ async_list->file = NULL;
}
}
/* New file? Reset state. */
if (async_list->file != filp) {
- async_list->io_len = 0;
+ async_list->io_start = kiocb->ki_pos;
+ async_list->io_len = len;
async_list->file = filp;
}
- async_list->io_end = io_end;
}
static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
*/
static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
{
- bool ret = false;
+ bool ret;
if (!list)
return false;
static void io_finish_async(struct io_ring_ctx *ctx)
{
+ int i;
+
io_sq_thread_stop(ctx);
- if (ctx->sqo_wq) {
- destroy_workqueue(ctx->sqo_wq);
- ctx->sqo_wq = NULL;
+ for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) {
+ if (ctx->sqo_wq[i]) {
+ destroy_workqueue(ctx->sqo_wq[i]);
+ ctx->sqo_wq[i] = NULL;
+ }
}
}
}
/* Do QD, or 2 * CPUS, whatever is smallest */
- ctx->sqo_wq = alloc_workqueue("io_ring-wq", WQ_UNBOUND | WQ_FREEZABLE,
+ ctx->sqo_wq[0] = alloc_workqueue("io_ring-wq",
+ WQ_UNBOUND | WQ_FREEZABLE,
min(ctx->sq_entries - 1, 2 * num_online_cpus()));
- if (!ctx->sqo_wq) {
+ if (!ctx->sqo_wq[0]) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /*
+ * This is for buffered writes, where we want to limit the parallelism
+ * due to file locking in file systems. As "normal" buffered writes
+ * should parellelize on writeout quite nicely, limit us to having 2
+ * pending. This avoids massive contention on the inode when doing
+ * buffered async writes.
+ */
+ ctx->sqo_wq[1] = alloc_workqueue("io_ring-write-wq",
+ WQ_UNBOUND | WQ_FREEZABLE, 2);
+ if (!ctx->sqo_wq[1]) {
ret = -ENOMEM;
goto err;
}
return 0;
err:
- io_sq_thread_stop(ctx);
+ io_finish_async(ctx);
mmdrop(ctx->sqo_mm);
ctx->sqo_mm = NULL;
return ret;
* Just return the requested submit count, and wake the thread if
* we were asked to.
*/
+ ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sqo_wait);
submitted = to_submit;
- goto out_ctx;
- }
-
- ret = 0;
- if (to_submit) {
+ } else if (to_submit) {
bool block_for_last = false;
to_submit = min(to_submit, ctx->sq_entries);
}
}
-out_ctx:
io_ring_drop_ctx_refs(ctx, 1);
out_fput:
fdput(f);