]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/fuse/virtio_fs.c
Merge tag 'for-linus-20191101' of git://git.kernel.dk/linux-block
[linux.git] / fs / fuse / virtio_fs.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * virtio-fs: Virtio Filesystem
4  * Copyright (C) 2018 Red Hat, Inc.
5  */
6
7 #include <linux/fs.h>
8 #include <linux/module.h>
9 #include <linux/virtio.h>
10 #include <linux/virtio_fs.h>
11 #include <linux/delay.h>
12 #include <linux/fs_context.h>
13 #include <linux/highmem.h>
14 #include "fuse_i.h"
15
16 /* List of virtio-fs device instances and a lock for the list. Also provides
17  * mutual exclusion in device removal and mounting path
18  */
19 static DEFINE_MUTEX(virtio_fs_mutex);
20 static LIST_HEAD(virtio_fs_instances);
21
22 enum {
23         VQ_HIPRIO,
24         VQ_REQUEST
25 };
26
27 /* Per-virtqueue state */
28 struct virtio_fs_vq {
29         spinlock_t lock;
30         struct virtqueue *vq;     /* protected by ->lock */
31         struct work_struct done_work;
32         struct list_head queued_reqs;
33         struct list_head end_reqs;      /* End these requests */
34         struct delayed_work dispatch_work;
35         struct fuse_dev *fud;
36         bool connected;
37         long in_flight;
38         char name[24];
39 } ____cacheline_aligned_in_smp;
40
41 /* A virtio-fs device instance */
42 struct virtio_fs {
43         struct kref refcount;
44         struct list_head list;    /* on virtio_fs_instances */
45         char *tag;
46         struct virtio_fs_vq *vqs;
47         unsigned int nvqs;               /* number of virtqueues */
48         unsigned int num_request_queues; /* number of request queues */
49 };
50
51 struct virtio_fs_forget {
52         struct fuse_in_header ih;
53         struct fuse_forget_in arg;
54         /* This request can be temporarily queued on virt queue */
55         struct list_head list;
56 };
57
58 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
59                                  struct fuse_req *req, bool in_flight);
60
61 static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
62 {
63         struct virtio_fs *fs = vq->vdev->priv;
64
65         return &fs->vqs[vq->index];
66 }
67
68 static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
69 {
70         return &vq_to_fsvq(vq)->fud->pq;
71 }
72
73 /* Should be called with fsvq->lock held. */
74 static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
75 {
76         fsvq->in_flight++;
77 }
78
79 /* Should be called with fsvq->lock held. */
80 static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
81 {
82         WARN_ON(fsvq->in_flight <= 0);
83         fsvq->in_flight--;
84 }
85
86 static void release_virtio_fs_obj(struct kref *ref)
87 {
88         struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
89
90         kfree(vfs->vqs);
91         kfree(vfs);
92 }
93
94 /* Make sure virtiofs_mutex is held */
95 static void virtio_fs_put(struct virtio_fs *fs)
96 {
97         kref_put(&fs->refcount, release_virtio_fs_obj);
98 }
99
100 static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
101 {
102         struct virtio_fs *vfs = fiq->priv;
103
104         mutex_lock(&virtio_fs_mutex);
105         virtio_fs_put(vfs);
106         mutex_unlock(&virtio_fs_mutex);
107 }
108
109 static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
110 {
111         WARN_ON(fsvq->in_flight < 0);
112
113         /* Wait for in flight requests to finish.*/
114         while (1) {
115                 spin_lock(&fsvq->lock);
116                 if (!fsvq->in_flight) {
117                         spin_unlock(&fsvq->lock);
118                         break;
119                 }
120                 spin_unlock(&fsvq->lock);
121                 /* TODO use completion instead of timeout */
122                 usleep_range(1000, 2000);
123         }
124
125         flush_work(&fsvq->done_work);
126         flush_delayed_work(&fsvq->dispatch_work);
127 }
128
129 static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
130 {
131         struct virtio_fs_vq *fsvq;
132         int i;
133
134         for (i = 0; i < fs->nvqs; i++) {
135                 fsvq = &fs->vqs[i];
136                 virtio_fs_drain_queue(fsvq);
137         }
138 }
139
140 static void virtio_fs_start_all_queues(struct virtio_fs *fs)
141 {
142         struct virtio_fs_vq *fsvq;
143         int i;
144
145         for (i = 0; i < fs->nvqs; i++) {
146                 fsvq = &fs->vqs[i];
147                 spin_lock(&fsvq->lock);
148                 fsvq->connected = true;
149                 spin_unlock(&fsvq->lock);
150         }
151 }
152
153 /* Add a new instance to the list or return -EEXIST if tag name exists*/
154 static int virtio_fs_add_instance(struct virtio_fs *fs)
155 {
156         struct virtio_fs *fs2;
157         bool duplicate = false;
158
159         mutex_lock(&virtio_fs_mutex);
160
161         list_for_each_entry(fs2, &virtio_fs_instances, list) {
162                 if (strcmp(fs->tag, fs2->tag) == 0)
163                         duplicate = true;
164         }
165
166         if (!duplicate)
167                 list_add_tail(&fs->list, &virtio_fs_instances);
168
169         mutex_unlock(&virtio_fs_mutex);
170
171         if (duplicate)
172                 return -EEXIST;
173         return 0;
174 }
175
176 /* Return the virtio_fs with a given tag, or NULL */
177 static struct virtio_fs *virtio_fs_find_instance(const char *tag)
178 {
179         struct virtio_fs *fs;
180
181         mutex_lock(&virtio_fs_mutex);
182
183         list_for_each_entry(fs, &virtio_fs_instances, list) {
184                 if (strcmp(fs->tag, tag) == 0) {
185                         kref_get(&fs->refcount);
186                         goto found;
187                 }
188         }
189
190         fs = NULL; /* not found */
191
192 found:
193         mutex_unlock(&virtio_fs_mutex);
194
195         return fs;
196 }
197
198 static void virtio_fs_free_devs(struct virtio_fs *fs)
199 {
200         unsigned int i;
201
202         for (i = 0; i < fs->nvqs; i++) {
203                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
204
205                 if (!fsvq->fud)
206                         continue;
207
208                 fuse_dev_free(fsvq->fud);
209                 fsvq->fud = NULL;
210         }
211 }
212
213 /* Read filesystem name from virtio config into fs->tag (must kfree()). */
214 static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
215 {
216         char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
217         char *end;
218         size_t len;
219
220         virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
221                            &tag_buf, sizeof(tag_buf));
222         end = memchr(tag_buf, '\0', sizeof(tag_buf));
223         if (end == tag_buf)
224                 return -EINVAL; /* empty tag */
225         if (!end)
226                 end = &tag_buf[sizeof(tag_buf)];
227
228         len = end - tag_buf;
229         fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
230         if (!fs->tag)
231                 return -ENOMEM;
232         memcpy(fs->tag, tag_buf, len);
233         fs->tag[len] = '\0';
234         return 0;
235 }
236
237 /* Work function for hiprio completion */
238 static void virtio_fs_hiprio_done_work(struct work_struct *work)
239 {
240         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
241                                                  done_work);
242         struct virtqueue *vq = fsvq->vq;
243
244         /* Free completed FUSE_FORGET requests */
245         spin_lock(&fsvq->lock);
246         do {
247                 unsigned int len;
248                 void *req;
249
250                 virtqueue_disable_cb(vq);
251
252                 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
253                         kfree(req);
254                         dec_in_flight_req(fsvq);
255                 }
256         } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
257         spin_unlock(&fsvq->lock);
258 }
259
260 static void virtio_fs_request_dispatch_work(struct work_struct *work)
261 {
262         struct fuse_req *req;
263         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
264                                                  dispatch_work.work);
265         struct fuse_conn *fc = fsvq->fud->fc;
266         int ret;
267
268         pr_debug("virtio-fs: worker %s called.\n", __func__);
269         while (1) {
270                 spin_lock(&fsvq->lock);
271                 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
272                                                list);
273                 if (!req) {
274                         spin_unlock(&fsvq->lock);
275                         break;
276                 }
277
278                 list_del_init(&req->list);
279                 spin_unlock(&fsvq->lock);
280                 fuse_request_end(fc, req);
281         }
282
283         /* Dispatch pending requests */
284         while (1) {
285                 spin_lock(&fsvq->lock);
286                 req = list_first_entry_or_null(&fsvq->queued_reqs,
287                                                struct fuse_req, list);
288                 if (!req) {
289                         spin_unlock(&fsvq->lock);
290                         return;
291                 }
292                 list_del_init(&req->list);
293                 spin_unlock(&fsvq->lock);
294
295                 ret = virtio_fs_enqueue_req(fsvq, req, true);
296                 if (ret < 0) {
297                         if (ret == -ENOMEM || ret == -ENOSPC) {
298                                 spin_lock(&fsvq->lock);
299                                 list_add_tail(&req->list, &fsvq->queued_reqs);
300                                 schedule_delayed_work(&fsvq->dispatch_work,
301                                                       msecs_to_jiffies(1));
302                                 spin_unlock(&fsvq->lock);
303                                 return;
304                         }
305                         req->out.h.error = ret;
306                         spin_lock(&fsvq->lock);
307                         dec_in_flight_req(fsvq);
308                         spin_unlock(&fsvq->lock);
309                         pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
310                                ret);
311                         fuse_request_end(fc, req);
312                 }
313         }
314 }
315
316 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
317 {
318         struct virtio_fs_forget *forget;
319         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
320                                                  dispatch_work.work);
321         struct virtqueue *vq = fsvq->vq;
322         struct scatterlist sg;
323         struct scatterlist *sgs[] = {&sg};
324         bool notify;
325         int ret;
326
327         pr_debug("virtio-fs: worker %s called.\n", __func__);
328         while (1) {
329                 spin_lock(&fsvq->lock);
330                 forget = list_first_entry_or_null(&fsvq->queued_reqs,
331                                         struct virtio_fs_forget, list);
332                 if (!forget) {
333                         spin_unlock(&fsvq->lock);
334                         return;
335                 }
336
337                 list_del(&forget->list);
338                 if (!fsvq->connected) {
339                         dec_in_flight_req(fsvq);
340                         spin_unlock(&fsvq->lock);
341                         kfree(forget);
342                         continue;
343                 }
344
345                 sg_init_one(&sg, forget, sizeof(*forget));
346
347                 /* Enqueue the request */
348                 dev_dbg(&vq->vdev->dev, "%s\n", __func__);
349                 ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
350                 if (ret < 0) {
351                         if (ret == -ENOMEM || ret == -ENOSPC) {
352                                 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
353                                          ret);
354                                 list_add_tail(&forget->list,
355                                                 &fsvq->queued_reqs);
356                                 schedule_delayed_work(&fsvq->dispatch_work,
357                                                 msecs_to_jiffies(1));
358                         } else {
359                                 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
360                                          ret);
361                                 dec_in_flight_req(fsvq);
362                                 kfree(forget);
363                         }
364                         spin_unlock(&fsvq->lock);
365                         return;
366                 }
367
368                 notify = virtqueue_kick_prepare(vq);
369                 spin_unlock(&fsvq->lock);
370
371                 if (notify)
372                         virtqueue_notify(vq);
373                 pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
374                          __func__);
375         }
376 }
377
378 /* Allocate and copy args into req->argbuf */
379 static int copy_args_to_argbuf(struct fuse_req *req)
380 {
381         struct fuse_args *args = req->args;
382         unsigned int offset = 0;
383         unsigned int num_in;
384         unsigned int num_out;
385         unsigned int len;
386         unsigned int i;
387
388         num_in = args->in_numargs - args->in_pages;
389         num_out = args->out_numargs - args->out_pages;
390         len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
391               fuse_len_args(num_out, args->out_args);
392
393         req->argbuf = kmalloc(len, GFP_ATOMIC);
394         if (!req->argbuf)
395                 return -ENOMEM;
396
397         for (i = 0; i < num_in; i++) {
398                 memcpy(req->argbuf + offset,
399                        args->in_args[i].value,
400                        args->in_args[i].size);
401                 offset += args->in_args[i].size;
402         }
403
404         return 0;
405 }
406
407 /* Copy args out of and free req->argbuf */
408 static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
409 {
410         unsigned int remaining;
411         unsigned int offset;
412         unsigned int num_in;
413         unsigned int num_out;
414         unsigned int i;
415
416         remaining = req->out.h.len - sizeof(req->out.h);
417         num_in = args->in_numargs - args->in_pages;
418         num_out = args->out_numargs - args->out_pages;
419         offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
420
421         for (i = 0; i < num_out; i++) {
422                 unsigned int argsize = args->out_args[i].size;
423
424                 if (args->out_argvar &&
425                     i == args->out_numargs - 1 &&
426                     argsize > remaining) {
427                         argsize = remaining;
428                 }
429
430                 memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
431                 offset += argsize;
432
433                 if (i != args->out_numargs - 1)
434                         remaining -= argsize;
435         }
436
437         /* Store the actual size of the variable-length arg */
438         if (args->out_argvar)
439                 args->out_args[args->out_numargs - 1].size = remaining;
440
441         kfree(req->argbuf);
442         req->argbuf = NULL;
443 }
444
445 /* Work function for request completion */
446 static void virtio_fs_requests_done_work(struct work_struct *work)
447 {
448         struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
449                                                  done_work);
450         struct fuse_pqueue *fpq = &fsvq->fud->pq;
451         struct fuse_conn *fc = fsvq->fud->fc;
452         struct virtqueue *vq = fsvq->vq;
453         struct fuse_req *req;
454         struct fuse_args_pages *ap;
455         struct fuse_req *next;
456         struct fuse_args *args;
457         unsigned int len, i, thislen;
458         struct page *page;
459         LIST_HEAD(reqs);
460
461         /* Collect completed requests off the virtqueue */
462         spin_lock(&fsvq->lock);
463         do {
464                 virtqueue_disable_cb(vq);
465
466                 while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
467                         spin_lock(&fpq->lock);
468                         list_move_tail(&req->list, &reqs);
469                         spin_unlock(&fpq->lock);
470                 }
471         } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
472         spin_unlock(&fsvq->lock);
473
474         /* End requests */
475         list_for_each_entry_safe(req, next, &reqs, list) {
476                 /*
477                  * TODO verify that server properly follows FUSE protocol
478                  * (oh.uniq, oh.len)
479                  */
480                 args = req->args;
481                 copy_args_from_argbuf(args, req);
482
483                 if (args->out_pages && args->page_zeroing) {
484                         len = args->out_args[args->out_numargs - 1].size;
485                         ap = container_of(args, typeof(*ap), args);
486                         for (i = 0; i < ap->num_pages; i++) {
487                                 thislen = ap->descs[i].length;
488                                 if (len < thislen) {
489                                         WARN_ON(ap->descs[i].offset);
490                                         page = ap->pages[i];
491                                         zero_user_segment(page, len, thislen);
492                                         len = 0;
493                                 } else {
494                                         len -= thislen;
495                                 }
496                         }
497                 }
498
499                 spin_lock(&fpq->lock);
500                 clear_bit(FR_SENT, &req->flags);
501                 list_del_init(&req->list);
502                 spin_unlock(&fpq->lock);
503
504                 fuse_request_end(fc, req);
505                 spin_lock(&fsvq->lock);
506                 dec_in_flight_req(fsvq);
507                 spin_unlock(&fsvq->lock);
508         }
509 }
510
511 /* Virtqueue interrupt handler */
512 static void virtio_fs_vq_done(struct virtqueue *vq)
513 {
514         struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
515
516         dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
517
518         schedule_work(&fsvq->done_work);
519 }
520
521 /* Initialize virtqueues */
522 static int virtio_fs_setup_vqs(struct virtio_device *vdev,
523                                struct virtio_fs *fs)
524 {
525         struct virtqueue **vqs;
526         vq_callback_t **callbacks;
527         const char **names;
528         unsigned int i;
529         int ret = 0;
530
531         virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
532                      &fs->num_request_queues);
533         if (fs->num_request_queues == 0)
534                 return -EINVAL;
535
536         fs->nvqs = 1 + fs->num_request_queues;
537         fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
538         if (!fs->vqs)
539                 return -ENOMEM;
540
541         vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
542         callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
543                                         GFP_KERNEL);
544         names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
545         if (!vqs || !callbacks || !names) {
546                 ret = -ENOMEM;
547                 goto out;
548         }
549
550         callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
551         snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
552                         "hiprio");
553         names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
554         INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
555         INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
556         INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
557         INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
558                         virtio_fs_hiprio_dispatch_work);
559         spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
560
561         /* Initialize the requests virtqueues */
562         for (i = VQ_REQUEST; i < fs->nvqs; i++) {
563                 spin_lock_init(&fs->vqs[i].lock);
564                 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
565                 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
566                                   virtio_fs_request_dispatch_work);
567                 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
568                 INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
569                 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
570                          "requests.%u", i - VQ_REQUEST);
571                 callbacks[i] = virtio_fs_vq_done;
572                 names[i] = fs->vqs[i].name;
573         }
574
575         ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
576         if (ret < 0)
577                 goto out;
578
579         for (i = 0; i < fs->nvqs; i++)
580                 fs->vqs[i].vq = vqs[i];
581
582         virtio_fs_start_all_queues(fs);
583 out:
584         kfree(names);
585         kfree(callbacks);
586         kfree(vqs);
587         if (ret)
588                 kfree(fs->vqs);
589         return ret;
590 }
591
592 /* Free virtqueues (device must already be reset) */
593 static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
594                                   struct virtio_fs *fs)
595 {
596         vdev->config->del_vqs(vdev);
597 }
598
599 static int virtio_fs_probe(struct virtio_device *vdev)
600 {
601         struct virtio_fs *fs;
602         int ret;
603
604         fs = kzalloc(sizeof(*fs), GFP_KERNEL);
605         if (!fs)
606                 return -ENOMEM;
607         kref_init(&fs->refcount);
608         vdev->priv = fs;
609
610         ret = virtio_fs_read_tag(vdev, fs);
611         if (ret < 0)
612                 goto out;
613
614         ret = virtio_fs_setup_vqs(vdev, fs);
615         if (ret < 0)
616                 goto out;
617
618         /* TODO vq affinity */
619
620         /* Bring the device online in case the filesystem is mounted and
621          * requests need to be sent before we return.
622          */
623         virtio_device_ready(vdev);
624
625         ret = virtio_fs_add_instance(fs);
626         if (ret < 0)
627                 goto out_vqs;
628
629         return 0;
630
631 out_vqs:
632         vdev->config->reset(vdev);
633         virtio_fs_cleanup_vqs(vdev, fs);
634
635 out:
636         vdev->priv = NULL;
637         kfree(fs);
638         return ret;
639 }
640
641 static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
642 {
643         struct virtio_fs_vq *fsvq;
644         int i;
645
646         for (i = 0; i < fs->nvqs; i++) {
647                 fsvq = &fs->vqs[i];
648                 spin_lock(&fsvq->lock);
649                 fsvq->connected = false;
650                 spin_unlock(&fsvq->lock);
651         }
652 }
653
654 static void virtio_fs_remove(struct virtio_device *vdev)
655 {
656         struct virtio_fs *fs = vdev->priv;
657
658         mutex_lock(&virtio_fs_mutex);
659         /* This device is going away. No one should get new reference */
660         list_del_init(&fs->list);
661         virtio_fs_stop_all_queues(fs);
662         virtio_fs_drain_all_queues(fs);
663         vdev->config->reset(vdev);
664         virtio_fs_cleanup_vqs(vdev, fs);
665
666         vdev->priv = NULL;
667         /* Put device reference on virtio_fs object */
668         virtio_fs_put(fs);
669         mutex_unlock(&virtio_fs_mutex);
670 }
671
672 #ifdef CONFIG_PM_SLEEP
673 static int virtio_fs_freeze(struct virtio_device *vdev)
674 {
675         /* TODO need to save state here */
676         pr_warn("virtio-fs: suspend/resume not yet supported\n");
677         return -EOPNOTSUPP;
678 }
679
680 static int virtio_fs_restore(struct virtio_device *vdev)
681 {
682          /* TODO need to restore state here */
683         return 0;
684 }
685 #endif /* CONFIG_PM_SLEEP */
686
687 const static struct virtio_device_id id_table[] = {
688         { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
689         {},
690 };
691
692 const static unsigned int feature_table[] = {};
693
694 static struct virtio_driver virtio_fs_driver = {
695         .driver.name            = KBUILD_MODNAME,
696         .driver.owner           = THIS_MODULE,
697         .id_table               = id_table,
698         .feature_table          = feature_table,
699         .feature_table_size     = ARRAY_SIZE(feature_table),
700         .probe                  = virtio_fs_probe,
701         .remove                 = virtio_fs_remove,
702 #ifdef CONFIG_PM_SLEEP
703         .freeze                 = virtio_fs_freeze,
704         .restore                = virtio_fs_restore,
705 #endif
706 };
707
708 static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
709 __releases(fiq->lock)
710 {
711         struct fuse_forget_link *link;
712         struct virtio_fs_forget *forget;
713         struct scatterlist sg;
714         struct scatterlist *sgs[] = {&sg};
715         struct virtio_fs *fs;
716         struct virtqueue *vq;
717         struct virtio_fs_vq *fsvq;
718         bool notify;
719         u64 unique;
720         int ret;
721
722         link = fuse_dequeue_forget(fiq, 1, NULL);
723         unique = fuse_get_unique(fiq);
724
725         fs = fiq->priv;
726         fsvq = &fs->vqs[VQ_HIPRIO];
727         spin_unlock(&fiq->lock);
728
729         /* Allocate a buffer for the request */
730         forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
731
732         forget->ih = (struct fuse_in_header){
733                 .opcode = FUSE_FORGET,
734                 .nodeid = link->forget_one.nodeid,
735                 .unique = unique,
736                 .len = sizeof(*forget),
737         };
738         forget->arg = (struct fuse_forget_in){
739                 .nlookup = link->forget_one.nlookup,
740         };
741
742         sg_init_one(&sg, forget, sizeof(*forget));
743
744         /* Enqueue the request */
745         spin_lock(&fsvq->lock);
746
747         if (!fsvq->connected) {
748                 kfree(forget);
749                 spin_unlock(&fsvq->lock);
750                 goto out;
751         }
752
753         vq = fsvq->vq;
754         dev_dbg(&vq->vdev->dev, "%s\n", __func__);
755
756         ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
757         if (ret < 0) {
758                 if (ret == -ENOMEM || ret == -ENOSPC) {
759                         pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
760                                  ret);
761                         list_add_tail(&forget->list, &fsvq->queued_reqs);
762                         schedule_delayed_work(&fsvq->dispatch_work,
763                                         msecs_to_jiffies(1));
764                         inc_in_flight_req(fsvq);
765                 } else {
766                         pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
767                                  ret);
768                         kfree(forget);
769                 }
770                 spin_unlock(&fsvq->lock);
771                 goto out;
772         }
773
774         inc_in_flight_req(fsvq);
775         notify = virtqueue_kick_prepare(vq);
776
777         spin_unlock(&fsvq->lock);
778
779         if (notify)
780                 virtqueue_notify(vq);
781 out:
782         kfree(link);
783 }
784
785 static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
786 __releases(fiq->lock)
787 {
788         /*
789          * TODO interrupts.
790          *
791          * Normal fs operations on a local filesystems aren't interruptible.
792          * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
793          * with shared lock between host and guest.
794          */
795         spin_unlock(&fiq->lock);
796 }
797
798 /* Return the number of scatter-gather list elements required */
799 static unsigned int sg_count_fuse_req(struct fuse_req *req)
800 {
801         struct fuse_args *args = req->args;
802         struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
803         unsigned int total_sgs = 1 /* fuse_in_header */;
804
805         if (args->in_numargs - args->in_pages)
806                 total_sgs += 1;
807
808         if (args->in_pages)
809                 total_sgs += ap->num_pages;
810
811         if (!test_bit(FR_ISREPLY, &req->flags))
812                 return total_sgs;
813
814         total_sgs += 1 /* fuse_out_header */;
815
816         if (args->out_numargs - args->out_pages)
817                 total_sgs += 1;
818
819         if (args->out_pages)
820                 total_sgs += ap->num_pages;
821
822         return total_sgs;
823 }
824
825 /* Add pages to scatter-gather list and return number of elements used */
826 static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
827                                        struct page **pages,
828                                        struct fuse_page_desc *page_descs,
829                                        unsigned int num_pages,
830                                        unsigned int total_len)
831 {
832         unsigned int i;
833         unsigned int this_len;
834
835         for (i = 0; i < num_pages && total_len; i++) {
836                 sg_init_table(&sg[i], 1);
837                 this_len =  min(page_descs[i].length, total_len);
838                 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
839                 total_len -= this_len;
840         }
841
842         return i;
843 }
844
845 /* Add args to scatter-gather list and return number of elements used */
846 static unsigned int sg_init_fuse_args(struct scatterlist *sg,
847                                       struct fuse_req *req,
848                                       struct fuse_arg *args,
849                                       unsigned int numargs,
850                                       bool argpages,
851                                       void *argbuf,
852                                       unsigned int *len_used)
853 {
854         struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
855         unsigned int total_sgs = 0;
856         unsigned int len;
857
858         len = fuse_len_args(numargs - argpages, args);
859         if (len)
860                 sg_init_one(&sg[total_sgs++], argbuf, len);
861
862         if (argpages)
863                 total_sgs += sg_init_fuse_pages(&sg[total_sgs],
864                                                 ap->pages, ap->descs,
865                                                 ap->num_pages,
866                                                 args[numargs - 1].size);
867
868         if (len_used)
869                 *len_used = len;
870
871         return total_sgs;
872 }
873
874 /* Add a request to a virtqueue and kick the device */
875 static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
876                                  struct fuse_req *req, bool in_flight)
877 {
878         /* requests need at least 4 elements */
879         struct scatterlist *stack_sgs[6];
880         struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
881         struct scatterlist **sgs = stack_sgs;
882         struct scatterlist *sg = stack_sg;
883         struct virtqueue *vq;
884         struct fuse_args *args = req->args;
885         unsigned int argbuf_used = 0;
886         unsigned int out_sgs = 0;
887         unsigned int in_sgs = 0;
888         unsigned int total_sgs;
889         unsigned int i;
890         int ret;
891         bool notify;
892         struct fuse_pqueue *fpq;
893
894         /* Does the sglist fit on the stack? */
895         total_sgs = sg_count_fuse_req(req);
896         if (total_sgs > ARRAY_SIZE(stack_sgs)) {
897                 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
898                 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
899                 if (!sgs || !sg) {
900                         ret = -ENOMEM;
901                         goto out;
902                 }
903         }
904
905         /* Use a bounce buffer since stack args cannot be mapped */
906         ret = copy_args_to_argbuf(req);
907         if (ret < 0)
908                 goto out;
909
910         /* Request elements */
911         sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
912         out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
913                                      (struct fuse_arg *)args->in_args,
914                                      args->in_numargs, args->in_pages,
915                                      req->argbuf, &argbuf_used);
916
917         /* Reply elements */
918         if (test_bit(FR_ISREPLY, &req->flags)) {
919                 sg_init_one(&sg[out_sgs + in_sgs++],
920                             &req->out.h, sizeof(req->out.h));
921                 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
922                                             args->out_args, args->out_numargs,
923                                             args->out_pages,
924                                             req->argbuf + argbuf_used, NULL);
925         }
926
927         WARN_ON(out_sgs + in_sgs != total_sgs);
928
929         for (i = 0; i < total_sgs; i++)
930                 sgs[i] = &sg[i];
931
932         spin_lock(&fsvq->lock);
933
934         if (!fsvq->connected) {
935                 spin_unlock(&fsvq->lock);
936                 ret = -ENOTCONN;
937                 goto out;
938         }
939
940         vq = fsvq->vq;
941         ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
942         if (ret < 0) {
943                 spin_unlock(&fsvq->lock);
944                 goto out;
945         }
946
947         /* Request successfully sent. */
948         fpq = &fsvq->fud->pq;
949         spin_lock(&fpq->lock);
950         list_add_tail(&req->list, fpq->processing);
951         spin_unlock(&fpq->lock);
952         set_bit(FR_SENT, &req->flags);
953         /* matches barrier in request_wait_answer() */
954         smp_mb__after_atomic();
955
956         if (!in_flight)
957                 inc_in_flight_req(fsvq);
958         notify = virtqueue_kick_prepare(vq);
959
960         spin_unlock(&fsvq->lock);
961
962         if (notify)
963                 virtqueue_notify(vq);
964
965 out:
966         if (ret < 0 && req->argbuf) {
967                 kfree(req->argbuf);
968                 req->argbuf = NULL;
969         }
970         if (sgs != stack_sgs) {
971                 kfree(sgs);
972                 kfree(sg);
973         }
974
975         return ret;
976 }
977
978 static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
979 __releases(fiq->lock)
980 {
981         unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
982         struct virtio_fs *fs;
983         struct fuse_req *req;
984         struct virtio_fs_vq *fsvq;
985         int ret;
986
987         WARN_ON(list_empty(&fiq->pending));
988         req = list_last_entry(&fiq->pending, struct fuse_req, list);
989         clear_bit(FR_PENDING, &req->flags);
990         list_del_init(&req->list);
991         WARN_ON(!list_empty(&fiq->pending));
992         spin_unlock(&fiq->lock);
993
994         fs = fiq->priv;
995
996         pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
997                   __func__, req->in.h.opcode, req->in.h.unique,
998                  req->in.h.nodeid, req->in.h.len,
999                  fuse_len_args(req->args->out_numargs, req->args->out_args));
1000
1001         fsvq = &fs->vqs[queue_id];
1002         ret = virtio_fs_enqueue_req(fsvq, req, false);
1003         if (ret < 0) {
1004                 if (ret == -ENOMEM || ret == -ENOSPC) {
1005                         /*
1006                          * Virtqueue full. Retry submission from worker
1007                          * context as we might be holding fc->bg_lock.
1008                          */
1009                         spin_lock(&fsvq->lock);
1010                         list_add_tail(&req->list, &fsvq->queued_reqs);
1011                         inc_in_flight_req(fsvq);
1012                         schedule_delayed_work(&fsvq->dispatch_work,
1013                                                 msecs_to_jiffies(1));
1014                         spin_unlock(&fsvq->lock);
1015                         return;
1016                 }
1017                 req->out.h.error = ret;
1018                 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
1019
1020                 /* Can't end request in submission context. Use a worker */
1021                 spin_lock(&fsvq->lock);
1022                 list_add_tail(&req->list, &fsvq->end_reqs);
1023                 schedule_delayed_work(&fsvq->dispatch_work, 0);
1024                 spin_unlock(&fsvq->lock);
1025                 return;
1026         }
1027 }
1028
1029 const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
1030         .wake_forget_and_unlock         = virtio_fs_wake_forget_and_unlock,
1031         .wake_interrupt_and_unlock      = virtio_fs_wake_interrupt_and_unlock,
1032         .wake_pending_and_unlock        = virtio_fs_wake_pending_and_unlock,
1033         .release                        = virtio_fs_fiq_release,
1034 };
1035
1036 static int virtio_fs_fill_super(struct super_block *sb)
1037 {
1038         struct fuse_conn *fc = get_fuse_conn_super(sb);
1039         struct virtio_fs *fs = fc->iq.priv;
1040         unsigned int i;
1041         int err;
1042         struct fuse_fs_context ctx = {
1043                 .rootmode = S_IFDIR,
1044                 .default_permissions = 1,
1045                 .allow_other = 1,
1046                 .max_read = UINT_MAX,
1047                 .blksize = 512,
1048                 .destroy = true,
1049                 .no_control = true,
1050                 .no_force_umount = true,
1051                 .no_mount_options = true,
1052         };
1053
1054         mutex_lock(&virtio_fs_mutex);
1055
1056         /* After holding mutex, make sure virtiofs device is still there.
1057          * Though we are holding a reference to it, drive ->remove might
1058          * still have cleaned up virtual queues. In that case bail out.
1059          */
1060         err = -EINVAL;
1061         if (list_empty(&fs->list)) {
1062                 pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
1063                 goto err;
1064         }
1065
1066         err = -ENOMEM;
1067         /* Allocate fuse_dev for hiprio and notification queues */
1068         for (i = 0; i < VQ_REQUEST; i++) {
1069                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1070
1071                 fsvq->fud = fuse_dev_alloc();
1072                 if (!fsvq->fud)
1073                         goto err_free_fuse_devs;
1074         }
1075
1076         ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
1077         err = fuse_fill_super_common(sb, &ctx);
1078         if (err < 0)
1079                 goto err_free_fuse_devs;
1080
1081         fc = fs->vqs[VQ_REQUEST].fud->fc;
1082
1083         for (i = 0; i < fs->nvqs; i++) {
1084                 struct virtio_fs_vq *fsvq = &fs->vqs[i];
1085
1086                 if (i == VQ_REQUEST)
1087                         continue; /* already initialized */
1088                 fuse_dev_install(fsvq->fud, fc);
1089         }
1090
1091         /* Previous unmount will stop all queues. Start these again */
1092         virtio_fs_start_all_queues(fs);
1093         fuse_send_init(fc);
1094         mutex_unlock(&virtio_fs_mutex);
1095         return 0;
1096
1097 err_free_fuse_devs:
1098         virtio_fs_free_devs(fs);
1099 err:
1100         mutex_unlock(&virtio_fs_mutex);
1101         return err;
1102 }
1103
1104 static void virtio_kill_sb(struct super_block *sb)
1105 {
1106         struct fuse_conn *fc = get_fuse_conn_super(sb);
1107         struct virtio_fs *vfs;
1108         struct virtio_fs_vq *fsvq;
1109
1110         /* If mount failed, we can still be called without any fc */
1111         if (!fc)
1112                 return fuse_kill_sb_anon(sb);
1113
1114         vfs = fc->iq.priv;
1115         fsvq = &vfs->vqs[VQ_HIPRIO];
1116
1117         /* Stop forget queue. Soon destroy will be sent */
1118         spin_lock(&fsvq->lock);
1119         fsvq->connected = false;
1120         spin_unlock(&fsvq->lock);
1121         virtio_fs_drain_all_queues(vfs);
1122
1123         fuse_kill_sb_anon(sb);
1124
1125         /* fuse_kill_sb_anon() must have sent destroy. Stop all queues
1126          * and drain one more time and free fuse devices. Freeing fuse
1127          * devices will drop their reference on fuse_conn and that in
1128          * turn will drop its reference on virtio_fs object.
1129          */
1130         virtio_fs_stop_all_queues(vfs);
1131         virtio_fs_drain_all_queues(vfs);
1132         virtio_fs_free_devs(vfs);
1133 }
1134
1135 static int virtio_fs_test_super(struct super_block *sb,
1136                                 struct fs_context *fsc)
1137 {
1138         struct fuse_conn *fc = fsc->s_fs_info;
1139
1140         return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
1141 }
1142
1143 static int virtio_fs_set_super(struct super_block *sb,
1144                                struct fs_context *fsc)
1145 {
1146         int err;
1147
1148         err = get_anon_bdev(&sb->s_dev);
1149         if (!err)
1150                 fuse_conn_get(fsc->s_fs_info);
1151
1152         return err;
1153 }
1154
1155 static int virtio_fs_get_tree(struct fs_context *fsc)
1156 {
1157         struct virtio_fs *fs;
1158         struct super_block *sb;
1159         struct fuse_conn *fc;
1160         int err;
1161
1162         /* This gets a reference on virtio_fs object. This ptr gets installed
1163          * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
1164          * to drop the reference to this object.
1165          */
1166         fs = virtio_fs_find_instance(fsc->source);
1167         if (!fs) {
1168                 pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
1169                 return -EINVAL;
1170         }
1171
1172         fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
1173         if (!fc) {
1174                 mutex_lock(&virtio_fs_mutex);
1175                 virtio_fs_put(fs);
1176                 mutex_unlock(&virtio_fs_mutex);
1177                 return -ENOMEM;
1178         }
1179
1180         fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
1181                        fs);
1182         fc->release = fuse_free_conn;
1183         fc->delete_stale = true;
1184
1185         fsc->s_fs_info = fc;
1186         sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
1187         fuse_conn_put(fc);
1188         if (IS_ERR(sb))
1189                 return PTR_ERR(sb);
1190
1191         if (!sb->s_root) {
1192                 err = virtio_fs_fill_super(sb);
1193                 if (err) {
1194                         deactivate_locked_super(sb);
1195                         return err;
1196                 }
1197
1198                 sb->s_flags |= SB_ACTIVE;
1199         }
1200
1201         WARN_ON(fsc->root);
1202         fsc->root = dget(sb->s_root);
1203         return 0;
1204 }
1205
1206 static const struct fs_context_operations virtio_fs_context_ops = {
1207         .get_tree       = virtio_fs_get_tree,
1208 };
1209
1210 static int virtio_fs_init_fs_context(struct fs_context *fsc)
1211 {
1212         fsc->ops = &virtio_fs_context_ops;
1213         return 0;
1214 }
1215
1216 static struct file_system_type virtio_fs_type = {
1217         .owner          = THIS_MODULE,
1218         .name           = "virtiofs",
1219         .init_fs_context = virtio_fs_init_fs_context,
1220         .kill_sb        = virtio_kill_sb,
1221 };
1222
1223 static int __init virtio_fs_init(void)
1224 {
1225         int ret;
1226
1227         ret = register_virtio_driver(&virtio_fs_driver);
1228         if (ret < 0)
1229                 return ret;
1230
1231         ret = register_filesystem(&virtio_fs_type);
1232         if (ret < 0) {
1233                 unregister_virtio_driver(&virtio_fs_driver);
1234                 return ret;
1235         }
1236
1237         return 0;
1238 }
1239 module_init(virtio_fs_init);
1240
1241 static void __exit virtio_fs_exit(void)
1242 {
1243         unregister_filesystem(&virtio_fs_type);
1244         unregister_virtio_driver(&virtio_fs_driver);
1245 }
1246 module_exit(virtio_fs_exit);
1247
1248 MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
1249 MODULE_DESCRIPTION("Virtio Filesystem");
1250 MODULE_LICENSE("GPL");
1251 MODULE_ALIAS_FS(KBUILD_MODNAME);
1252 MODULE_DEVICE_TABLE(virtio, id_table);