fs/nfsd/filecache.c

   1 /*
   2  * Open file cache.
   3  *
   4  * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
   5  */
   6
   7 #include <linux/hash.h>
   8 #include <linux/slab.h>
   9 #include <linux/file.h>
  10 #include <linux/sched.h>
  11 #include <linux/list_lru.h>
  12 #include <linux/fsnotify_backend.h>
  13 #include <linux/fsnotify.h>
  14 #include <linux/seq_file.h>
  15
  16 #include "vfs.h"
  17 #include "nfsd.h"
  18 #include "nfsfh.h"
  19 #include "netns.h"
  20 #include "filecache.h"
  21 #include "trace.h"
  22
  23 #define NFSDDBG_FACILITY        NFSDDBG_FH
  24
  25 /* FIXME: dynamically size this for the machine somehow? */
  26 #define NFSD_FILE_HASH_BITS                   12
  27 #define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
  28 #define NFSD_LAUNDRETTE_DELAY                (2 * HZ)
  29
  30 #define NFSD_FILE_LRU_RESCAN                 (0)
  31 #define NFSD_FILE_SHUTDOWN                   (1)
  32 #define NFSD_FILE_LRU_THRESHOLD              (4096UL)
  33 #define NFSD_FILE_LRU_LIMIT                  (NFSD_FILE_LRU_THRESHOLD << 2)
  34
  35 /* We only care about NFSD_MAY_READ/WRITE for this cache */
  36 #define NFSD_FILE_MAY_MASK      (NFSD_MAY_READ|NFSD_MAY_WRITE)
  37
  38 struct nfsd_fcache_bucket {
  39         struct hlist_head       nfb_head;
  40         spinlock_t              nfb_lock;
  41         unsigned int            nfb_count;
  42         unsigned int            nfb_maxcount;
  43 };
  44
  45 static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
  46
  47 static struct kmem_cache                *nfsd_file_slab;
  48 static struct kmem_cache                *nfsd_file_mark_slab;
  49 static struct nfsd_fcache_bucket        *nfsd_file_hashtbl;
  50 static struct list_lru                  nfsd_file_lru;
  51 static long                             nfsd_file_lru_flags;
  52 static struct fsnotify_group            *nfsd_file_fsnotify_group;
  53 static atomic_long_t                    nfsd_filecache_count;
  54 static struct delayed_work              nfsd_filecache_laundrette;
  55
  56 enum nfsd_file_laundrette_ctl {
  57         NFSD_FILE_LAUNDRETTE_NOFLUSH = 0,
  58         NFSD_FILE_LAUNDRETTE_MAY_FLUSH
  59 };
  60
  61 static void
  62 nfsd_file_schedule_laundrette(enum nfsd_file_laundrette_ctl ctl)
  63 {
  64         long count = atomic_long_read(&nfsd_filecache_count);
  65
  66         if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
  67                 return;
  68
  69         /* Be more aggressive about scanning if over the threshold */
  70         if (count > NFSD_FILE_LRU_THRESHOLD)
  71                 mod_delayed_work(system_wq, &nfsd_filecache_laundrette, 0);
  72         else
  73                 schedule_delayed_work(&nfsd_filecache_laundrette, NFSD_LAUNDRETTE_DELAY);
  74
  75         if (ctl == NFSD_FILE_LAUNDRETTE_NOFLUSH)
  76                 return;
  77
  78         /* ...and don't delay flushing if we're out of control */
  79         if (count >= NFSD_FILE_LRU_LIMIT)
  80                 flush_delayed_work(&nfsd_filecache_laundrette);
  81 }
  82
  83 static void
  84 nfsd_file_slab_free(struct rcu_head *rcu)
  85 {
  86         struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
  87
  88         put_cred(nf->nf_cred);
  89         kmem_cache_free(nfsd_file_slab, nf);
  90 }
  91
  92 static void
  93 nfsd_file_mark_free(struct fsnotify_mark *mark)
  94 {
  95         struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
  96                                                   nfm_mark);
  97
  98         kmem_cache_free(nfsd_file_mark_slab, nfm);
  99 }
 100
 101 static struct nfsd_file_mark *
 102 nfsd_file_mark_get(struct nfsd_file_mark *nfm)
 103 {
 104         if (!atomic_inc_not_zero(&nfm->nfm_ref))
 105                 return NULL;
 106         return nfm;
 107 }
 108
 109 static void
 110 nfsd_file_mark_put(struct nfsd_file_mark *nfm)
 111 {
 112         if (atomic_dec_and_test(&nfm->nfm_ref)) {
 113
 114                 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
 115                 fsnotify_put_mark(&nfm->nfm_mark);
 116         }
 117 }
 118
 119 static struct nfsd_file_mark *
 120 nfsd_file_mark_find_or_create(struct nfsd_file *nf)
 121 {
 122         int                     err;
 123         struct fsnotify_mark    *mark;
 124         struct nfsd_file_mark   *nfm = NULL, *new;
 125         struct inode *inode = nf->nf_inode;
 126
 127         do {
 128                 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex);
 129                 mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
 130                                 nfsd_file_fsnotify_group);
 131                 if (mark) {
 132                         nfm = nfsd_file_mark_get(container_of(mark,
 133                                                  struct nfsd_file_mark,
 134                                                  nfm_mark));
 135                         mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
 136                         fsnotify_put_mark(mark);
 137                         if (likely(nfm))
 138                                 break;
 139                 } else
 140                         mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex);
 141
 142                 /* allocate a new nfm */
 143                 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
 144                 if (!new)
 145                         return NULL;
 146                 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
 147                 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
 148                 atomic_set(&new->nfm_ref, 1);
 149
 150                 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
 151
 152                 /*
 153                  * If the add was successful, then return the object.
 154                  * Otherwise, we need to put the reference we hold on the
 155                  * nfm_mark. The fsnotify code will take a reference and put
 156                  * it on failure, so we can't just free it directly. It's also
 157                  * not safe to call fsnotify_destroy_mark on it as the
 158                  * mark->group will be NULL. Thus, we can't let the nfm_ref
 159                  * counter drive the destruction at this point.
 160                  */
 161                 if (likely(!err))
 162                         nfm = new;
 163                 else
 164                         fsnotify_put_mark(&new->nfm_mark);
 165         } while (unlikely(err == -EEXIST));
 166
 167         return nfm;
 168 }
 169
 170 static struct nfsd_file *
 171 nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
 172                 struct net *net)
 173 {
 174         struct nfsd_file *nf;
 175
 176         nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
 177         if (nf) {
 178                 INIT_HLIST_NODE(&nf->nf_node);
 179                 INIT_LIST_HEAD(&nf->nf_lru);
 180                 nf->nf_file = NULL;
 181                 nf->nf_cred = get_current_cred();
 182                 nf->nf_net = net;
 183                 nf->nf_flags = 0;
 184                 nf->nf_inode = inode;
 185                 nf->nf_hashval = hashval;
 186                 atomic_set(&nf->nf_ref, 1);
 187                 nf->nf_may = may & NFSD_FILE_MAY_MASK;
 188                 if (may & NFSD_MAY_NOT_BREAK_LEASE) {
 189                         if (may & NFSD_MAY_WRITE)
 190                                 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
 191                         if (may & NFSD_MAY_READ)
 192                                 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
 193                 }
 194                 nf->nf_mark = NULL;
 195                 trace_nfsd_file_alloc(nf);
 196         }
 197         return nf;
 198 }
 199
 200 static bool
 201 nfsd_file_free(struct nfsd_file *nf)
 202 {
 203         bool flush = false;
 204
 205         trace_nfsd_file_put_final(nf);
 206         if (nf->nf_mark)
 207                 nfsd_file_mark_put(nf->nf_mark);
 208         if (nf->nf_file) {
 209                 get_file(nf->nf_file);
 210                 filp_close(nf->nf_file, NULL);
 211                 fput(nf->nf_file);
 212                 flush = true;
 213         }
 214         call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
 215         return flush;
 216 }
 217
 218 static bool
 219 nfsd_file_check_writeback(struct nfsd_file *nf)
 220 {
 221         struct file *file = nf->nf_file;
 222         struct address_space *mapping;
 223
 224         if (!file || !(file->f_mode & FMODE_WRITE))
 225                 return false;
 226         mapping = file->f_mapping;
 227         return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
 228                 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
 229 }
 230
 231 static int
 232 nfsd_file_check_write_error(struct nfsd_file *nf)
 233 {
 234         struct file *file = nf->nf_file;
 235
 236         if (!file || !(file->f_mode & FMODE_WRITE))
 237                 return 0;
 238         return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
 239 }
 240
 241 static bool
 242 nfsd_file_in_use(struct nfsd_file *nf)
 243 {
 244         return nfsd_file_check_writeback(nf) ||
 245                         nfsd_file_check_write_error(nf);
 246 }
 247
 248 static void
 249 nfsd_file_do_unhash(struct nfsd_file *nf)
 250 {
 251         lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 252
 253         trace_nfsd_file_unhash(nf);
 254
 255         if (nfsd_file_check_write_error(nf))
 256                 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id));
 257         --nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
 258         hlist_del_rcu(&nf->nf_node);
 259         atomic_long_dec(&nfsd_filecache_count);
 260 }
 261
 262 static bool
 263 nfsd_file_unhash(struct nfsd_file *nf)
 264 {
 265         if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
 266                 nfsd_file_do_unhash(nf);
 267                 if (!list_empty(&nf->nf_lru))
 268                         list_lru_del(&nfsd_file_lru, &nf->nf_lru);
 269                 return true;
 270         }
 271         return false;
 272 }
 273
 274 /*
 275  * Return true if the file was unhashed.
 276  */
 277 static bool
 278 nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
 279 {
 280         lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 281
 282         trace_nfsd_file_unhash_and_release_locked(nf);
 283         if (!nfsd_file_unhash(nf))
 284                 return false;
 285         /* keep final reference for nfsd_file_lru_dispose */
 286         if (atomic_add_unless(&nf->nf_ref, -1, 1))
 287                 return true;
 288
 289         list_add(&nf->nf_lru, dispose);
 290         return true;
 291 }
 292
 293 static int
 294 nfsd_file_put_noref(struct nfsd_file *nf)
 295 {
 296         int count;
 297         trace_nfsd_file_put(nf);
 298
 299         count = atomic_dec_return(&nf->nf_ref);
 300         if (!count) {
 301                 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
 302                 nfsd_file_free(nf);
 303         }
 304         return count;
 305 }
 306
 307 void
 308 nfsd_file_put(struct nfsd_file *nf)
 309 {
 310         bool is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0;
 311         bool unused = !nfsd_file_in_use(nf);
 312
 313         set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
 314         if (nfsd_file_put_noref(nf) == 1 && is_hashed && unused)
 315                 nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_MAY_FLUSH);
 316 }
 317
 318 struct nfsd_file *
 319 nfsd_file_get(struct nfsd_file *nf)
 320 {
 321         if (likely(atomic_inc_not_zero(&nf->nf_ref)))
 322                 return nf;
 323         return NULL;
 324 }
 325
 326 static void
 327 nfsd_file_dispose_list(struct list_head *dispose)
 328 {
 329         struct nfsd_file *nf;
 330
 331         while(!list_empty(dispose)) {
 332                 nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
 333                 list_del(&nf->nf_lru);
 334                 nfsd_file_put_noref(nf);
 335         }
 336 }
 337
 338 static void
 339 nfsd_file_dispose_list_sync(struct list_head *dispose)
 340 {
 341         bool flush = false;
 342         struct nfsd_file *nf;
 343
 344         while(!list_empty(dispose)) {
 345                 nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
 346                 list_del(&nf->nf_lru);
 347                 if (!atomic_dec_and_test(&nf->nf_ref))
 348                         continue;
 349                 if (nfsd_file_free(nf))
 350                         flush = true;
 351         }
 352         if (flush)
 353                 flush_delayed_fput();
 354 }
 355
 356 /*
 357  * Note this can deadlock with nfsd_file_cache_purge.
 358  */
 359 static enum lru_status
 360 nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
 361                  spinlock_t *lock, void *arg)
 362         __releases(lock)
 363         __acquires(lock)
 364 {
 365         struct list_head *head = arg;
 366         struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
 367
 368         /*
 369          * Do a lockless refcount check. The hashtable holds one reference, so
 370          * we look to see if anything else has a reference, or if any have
 371          * been put since the shrinker last ran. Those don't get unhashed and
 372          * released.
 373          *
 374          * Note that in the put path, we set the flag and then decrement the
 375          * counter. Here we check the counter and then test and clear the flag.
 376          * That order is deliberate to ensure that we can do this locklessly.
 377          */
 378         if (atomic_read(&nf->nf_ref) > 1)
 379                 goto out_skip;
 380
 381         /*
 382          * Don't throw out files that are still undergoing I/O or
 383          * that have uncleared errors pending.
 384          */
 385         if (nfsd_file_check_writeback(nf))
 386                 goto out_skip;
 387
 388         if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
 389                 goto out_rescan;
 390
 391         if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
 392                 goto out_skip;
 393
 394         list_lru_isolate_move(lru, &nf->nf_lru, head);
 395         return LRU_REMOVED;
 396 out_rescan:
 397         set_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags);
 398 out_skip:
 399         return LRU_SKIP;
 400 }
 401
 402 static void
 403 nfsd_file_lru_dispose(struct list_head *head)
 404 {
 405         struct nfsd_file *nf;
 406
 407         list_for_each_entry(nf, head, nf_lru) {
 408                 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 409                 nfsd_file_do_unhash(nf);
 410                 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
 411         }
 412         nfsd_file_dispose_list(head);
 413 }
 414
 415 static unsigned long
 416 nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
 417 {
 418         return list_lru_count(&nfsd_file_lru);
 419 }
 420
 421 static unsigned long
 422 nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
 423 {
 424         LIST_HEAD(head);
 425         unsigned long ret;
 426
 427         ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &head);
 428         nfsd_file_lru_dispose(&head);
 429         return ret;
 430 }
 431
 432 static struct shrinker  nfsd_file_shrinker = {
 433         .scan_objects = nfsd_file_lru_scan,
 434         .count_objects = nfsd_file_lru_count,
 435         .seeks = 1,
 436 };
 437
 438 static void
 439 __nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
 440                         struct list_head *dispose)
 441 {
 442         struct nfsd_file        *nf;
 443         struct hlist_node       *tmp;
 444
 445         spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
 446         hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
 447                 if (inode == nf->nf_inode)
 448                         nfsd_file_unhash_and_release_locked(nf, dispose);
 449         }
 450         spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 451 }
 452
 453 /**
 454  * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
 455  * @inode: inode of the file to attempt to remove
 456  *
 457  * Walk the whole hash bucket, looking for any files that correspond to "inode".
 458  * If any do, then unhash them and put the hashtable reference to them and
 459  * destroy any that had their last reference put. Also ensure that any of the
 460  * fputs also have their final __fput done as well.
 461  */
 462 void
 463 nfsd_file_close_inode_sync(struct inode *inode)
 464 {
 465         unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
 466                                                 NFSD_FILE_HASH_BITS);
 467         LIST_HEAD(dispose);
 468
 469         __nfsd_file_close_inode(inode, hashval, &dispose);
 470         trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
 471         nfsd_file_dispose_list_sync(&dispose);
 472 }
 473
 474 /**
 475  * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
 476  * @inode: inode of the file to attempt to remove
 477  *
 478  * Walk the whole hash bucket, looking for any files that correspond to "inode".
 479  * If any do, then unhash them and put the hashtable reference to them and
 480  * destroy any that had their last reference put.
 481  */
 482 static void
 483 nfsd_file_close_inode(struct inode *inode)
 484 {
 485         unsigned int            hashval = (unsigned int)hash_long(inode->i_ino,
 486                                                 NFSD_FILE_HASH_BITS);
 487         LIST_HEAD(dispose);
 488
 489         __nfsd_file_close_inode(inode, hashval, &dispose);
 490         trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
 491         nfsd_file_dispose_list(&dispose);
 492 }
 493
 494 /**
 495  * nfsd_file_delayed_close - close unused nfsd_files
 496  * @work: dummy
 497  *
 498  * Walk the LRU list and close any entries that have not been used since
 499  * the last scan.
 500  *
 501  * Note this can deadlock with nfsd_file_cache_purge.
 502  */
 503 static void
 504 nfsd_file_delayed_close(struct work_struct *work)
 505 {
 506         LIST_HEAD(head);
 507
 508         list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &head, LONG_MAX);
 509
 510         if (test_and_clear_bit(NFSD_FILE_LRU_RESCAN, &nfsd_file_lru_flags))
 511                 nfsd_file_schedule_laundrette(NFSD_FILE_LAUNDRETTE_NOFLUSH);
 512
 513         if (!list_empty(&head)) {
 514                 nfsd_file_lru_dispose(&head);
 515                 flush_delayed_fput();
 516         }
 517 }
 518
 519 static int
 520 nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
 521                             void *data)
 522 {
 523         struct file_lock *fl = data;
 524
 525         /* Only close files for F_SETLEASE leases */
 526         if (fl->fl_flags & FL_LEASE)
 527                 nfsd_file_close_inode_sync(file_inode(fl->fl_file));
 528         return 0;
 529 }
 530
 531 static struct notifier_block nfsd_file_lease_notifier = {
 532         .notifier_call = nfsd_file_lease_notifier_call,
 533 };
 534
 535 static int
 536 nfsd_file_fsnotify_handle_event(struct fsnotify_group *group,
 537                                 struct inode *inode,
 538                                 u32 mask, const void *data, int data_type,
 539                                 const struct qstr *file_name, u32 cookie,
 540                                 struct fsnotify_iter_info *iter_info)
 541 {
 542         trace_nfsd_file_fsnotify_handle_event(inode, mask);
 543
 544         /* Should be no marks on non-regular files */
 545         if (!S_ISREG(inode->i_mode)) {
 546                 WARN_ON_ONCE(1);
 547                 return 0;
 548         }
 549
 550         /* don't close files if this was not the last link */
 551         if (mask & FS_ATTRIB) {
 552                 if (inode->i_nlink)
 553                         return 0;
 554         }
 555
 556         nfsd_file_close_inode(inode);
 557         return 0;
 558 }
 559
 560
 561 static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
 562         .handle_event = nfsd_file_fsnotify_handle_event,
 563         .free_mark = nfsd_file_mark_free,
 564 };
 565
 566 int
 567 nfsd_file_cache_init(void)
 568 {
 569         int             ret = -ENOMEM;
 570         unsigned int    i;
 571
 572         clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
 573
 574         if (nfsd_file_hashtbl)
 575                 return 0;
 576
 577         nfsd_file_hashtbl = kcalloc(NFSD_FILE_HASH_SIZE,
 578                                 sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
 579         if (!nfsd_file_hashtbl) {
 580                 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
 581                 goto out_err;
 582         }
 583
 584         nfsd_file_slab = kmem_cache_create("nfsd_file",
 585                                 sizeof(struct nfsd_file), 0, 0, NULL);
 586         if (!nfsd_file_slab) {
 587                 pr_err("nfsd: unable to create nfsd_file_slab\n");
 588                 goto out_err;
 589         }
 590
 591         nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
 592                                         sizeof(struct nfsd_file_mark), 0, 0, NULL);
 593         if (!nfsd_file_mark_slab) {
 594                 pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
 595                 goto out_err;
 596         }
 597
 598
 599         ret = list_lru_init(&nfsd_file_lru);
 600         if (ret) {
 601                 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
 602                 goto out_err;
 603         }
 604
 605         ret = register_shrinker(&nfsd_file_shrinker);
 606         if (ret) {
 607                 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
 608                 goto out_lru;
 609         }
 610
 611         ret = lease_register_notifier(&nfsd_file_lease_notifier);
 612         if (ret) {
 613                 pr_err("nfsd: unable to register lease notifier: %d\n", ret);
 614                 goto out_shrinker;
 615         }
 616
 617         nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops);
 618         if (IS_ERR(nfsd_file_fsnotify_group)) {
 619                 pr_err("nfsd: unable to create fsnotify group: %ld\n",
 620                         PTR_ERR(nfsd_file_fsnotify_group));
 621                 nfsd_file_fsnotify_group = NULL;
 622                 goto out_notifier;
 623         }
 624
 625         for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
 626                 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
 627                 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
 628         }
 629
 630         INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_delayed_close);
 631 out:
 632         return ret;
 633 out_notifier:
 634         lease_unregister_notifier(&nfsd_file_lease_notifier);
 635 out_shrinker:
 636         unregister_shrinker(&nfsd_file_shrinker);
 637 out_lru:
 638         list_lru_destroy(&nfsd_file_lru);
 639 out_err:
 640         kmem_cache_destroy(nfsd_file_slab);
 641         nfsd_file_slab = NULL;
 642         kmem_cache_destroy(nfsd_file_mark_slab);
 643         nfsd_file_mark_slab = NULL;
 644         kfree(nfsd_file_hashtbl);
 645         nfsd_file_hashtbl = NULL;
 646         goto out;
 647 }
 648
 649 /*
 650  * Note this can deadlock with nfsd_file_lru_cb.
 651  */
 652 void
 653 nfsd_file_cache_purge(struct net *net)
 654 {
 655         unsigned int            i;
 656         struct nfsd_file        *nf;
 657         struct hlist_node       *next;
 658         LIST_HEAD(dispose);
 659         bool del;
 660
 661         if (!nfsd_file_hashtbl)
 662                 return;
 663
 664         for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
 665                 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
 666
 667                 spin_lock(&nfb->nfb_lock);
 668                 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
 669                         if (net && nf->nf_net != net)
 670                                 continue;
 671                         del = nfsd_file_unhash_and_release_locked(nf, &dispose);
 672
 673                         /*
 674                          * Deadlock detected! Something marked this entry as
 675                          * unhased, but hasn't removed it from the hash list.
 676                          */
 677                         WARN_ON_ONCE(!del);
 678                 }
 679                 spin_unlock(&nfb->nfb_lock);
 680                 nfsd_file_dispose_list(&dispose);
 681         }
 682 }
 683
 684 void
 685 nfsd_file_cache_shutdown(void)
 686 {
 687         set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
 688
 689         lease_unregister_notifier(&nfsd_file_lease_notifier);
 690         unregister_shrinker(&nfsd_file_shrinker);
 691         /*
 692          * make sure all callers of nfsd_file_lru_cb are done before
 693          * calling nfsd_file_cache_purge
 694          */
 695         cancel_delayed_work_sync(&nfsd_filecache_laundrette);
 696         nfsd_file_cache_purge(NULL);
 697         list_lru_destroy(&nfsd_file_lru);
 698         rcu_barrier();
 699         fsnotify_put_group(nfsd_file_fsnotify_group);
 700         nfsd_file_fsnotify_group = NULL;
 701         kmem_cache_destroy(nfsd_file_slab);
 702         nfsd_file_slab = NULL;
 703         fsnotify_wait_marks_destroyed();
 704         kmem_cache_destroy(nfsd_file_mark_slab);
 705         nfsd_file_mark_slab = NULL;
 706         kfree(nfsd_file_hashtbl);
 707         nfsd_file_hashtbl = NULL;
 708 }
 709
 710 static bool
 711 nfsd_match_cred(const struct cred *c1, const struct cred *c2)
 712 {
 713         int i;
 714
 715         if (!uid_eq(c1->fsuid, c2->fsuid))
 716                 return false;
 717         if (!gid_eq(c1->fsgid, c2->fsgid))
 718                 return false;
 719         if (c1->group_info == NULL || c2->group_info == NULL)
 720                 return c1->group_info == c2->group_info;
 721         if (c1->group_info->ngroups != c2->group_info->ngroups)
 722                 return false;
 723         for (i = 0; i < c1->group_info->ngroups; i++) {
 724                 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
 725                         return false;
 726         }
 727         return true;
 728 }
 729
 730 static struct nfsd_file *
 731 nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
 732                         unsigned int hashval, struct net *net)
 733 {
 734         struct nfsd_file *nf;
 735         unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
 736
 737         hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
 738                                  nf_node) {
 739                 if ((need & nf->nf_may) != need)
 740                         continue;
 741                 if (nf->nf_inode != inode)
 742                         continue;
 743                 if (nf->nf_net != net)
 744                         continue;
 745                 if (!nfsd_match_cred(nf->nf_cred, current_cred()))
 746                         continue;
 747                 if (nfsd_file_get(nf) != NULL)
 748                         return nf;
 749         }
 750         return NULL;
 751 }
 752
 753 /**
 754  * nfsd_file_is_cached - are there any cached open files for this fh?
 755  * @inode: inode of the file to check
 756  *
 757  * Scan the hashtable for open files that match this fh. Returns true if there
 758  * are any, and false if not.
 759  */
 760 bool
 761 nfsd_file_is_cached(struct inode *inode)
 762 {
 763         bool                    ret = false;
 764         struct nfsd_file        *nf;
 765         unsigned int            hashval;
 766
 767         hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
 768
 769         rcu_read_lock();
 770         hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
 771                                  nf_node) {
 772                 if (inode == nf->nf_inode) {
 773                         ret = true;
 774                         break;
 775                 }
 776         }
 777         rcu_read_unlock();
 778         trace_nfsd_file_is_cached(inode, hashval, (int)ret);
 779         return ret;
 780 }
 781
 782 __be32
 783 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
 784                   unsigned int may_flags, struct nfsd_file **pnf)
 785 {
 786         __be32  status;
 787         struct net *net = SVC_NET(rqstp);
 788         struct nfsd_file *nf, *new;
 789         struct inode *inode;
 790         unsigned int hashval;
 791         bool retry = true;
 792
 793         /* FIXME: skip this if fh_dentry is already set? */
 794         status = fh_verify(rqstp, fhp, S_IFREG,
 795                                 may_flags|NFSD_MAY_OWNER_OVERRIDE);
 796         if (status != nfs_ok)
 797                 return status;
 798
 799         inode = d_inode(fhp->fh_dentry);
 800         hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
 801 retry:
 802         rcu_read_lock();
 803         nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
 804         rcu_read_unlock();
 805         if (nf)
 806                 goto wait_for_construction;
 807
 808         new = nfsd_file_alloc(inode, may_flags, hashval, net);
 809         if (!new) {
 810                 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
 811                                         NULL, nfserr_jukebox);
 812                 return nfserr_jukebox;
 813         }
 814
 815         spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
 816         nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
 817         if (nf == NULL)
 818                 goto open_file;
 819         spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 820         nfsd_file_slab_free(&new->nf_rcu);
 821
 822 wait_for_construction:
 823         wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
 824
 825         /* Did construction of this file fail? */
 826         if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
 827                 if (!retry) {
 828                         status = nfserr_jukebox;
 829                         goto out;
 830                 }
 831                 retry = false;
 832                 nfsd_file_put_noref(nf);
 833                 goto retry;
 834         }
 835
 836         this_cpu_inc(nfsd_file_cache_hits);
 837
 838         if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
 839                 bool write = (may_flags & NFSD_MAY_WRITE);
 840
 841                 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
 842                     (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
 843                         status = nfserrno(nfsd_open_break_lease(
 844                                         file_inode(nf->nf_file), may_flags));
 845                         if (status == nfs_ok) {
 846                                 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
 847                                 if (write)
 848                                         clear_bit(NFSD_FILE_BREAK_WRITE,
 849                                                   &nf->nf_flags);
 850                         }
 851                 }
 852         }
 853 out:
 854         if (status == nfs_ok) {
 855                 *pnf = nf;
 856         } else {
 857                 nfsd_file_put(nf);
 858                 nf = NULL;
 859         }
 860
 861         trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
 862         return status;
 863 open_file:
 864         nf = new;
 865         /* Take reference for the hashtable */
 866         atomic_inc(&nf->nf_ref);
 867         __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
 868         __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
 869         list_lru_add(&nfsd_file_lru, &nf->nf_lru);
 870         hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
 871         ++nfsd_file_hashtbl[hashval].nfb_count;
 872         nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
 873                         nfsd_file_hashtbl[hashval].nfb_count);
 874         spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 875         atomic_long_inc(&nfsd_filecache_count);
 876
 877         nf->nf_mark = nfsd_file_mark_find_or_create(nf);
 878         if (nf->nf_mark)
 879                 status = nfsd_open_verified(rqstp, fhp, S_IFREG,
 880                                 may_flags, &nf->nf_file);
 881         else
 882                 status = nfserr_jukebox;
 883         /*
 884          * If construction failed, or we raced with a call to unlink()
 885          * then unhash.
 886          */
 887         if (status != nfs_ok || inode->i_nlink == 0) {
 888                 bool do_free;
 889                 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
 890                 do_free = nfsd_file_unhash(nf);
 891                 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
 892                 if (do_free)
 893                         nfsd_file_put_noref(nf);
 894         }
 895         clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
 896         smp_mb__after_atomic();
 897         wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
 898         goto out;
 899 }
 900
 901 /*
 902  * Note that fields may be added, removed or reordered in the future. Programs
 903  * scraping this file for info should test the labels to ensure they're
 904  * getting the correct field.
 905  */
 906 static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
 907 {
 908         unsigned int i, count = 0, longest = 0;
 909         unsigned long hits = 0;
 910
 911         /*
 912          * No need for spinlocks here since we're not terribly interested in
 913          * accuracy. We do take the nfsd_mutex simply to ensure that we
 914          * don't end up racing with server shutdown
 915          */
 916         mutex_lock(&nfsd_mutex);
 917         if (nfsd_file_hashtbl) {
 918                 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
 919                         count += nfsd_file_hashtbl[i].nfb_count;
 920                         longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
 921                 }
 922         }
 923         mutex_unlock(&nfsd_mutex);
 924
 925         for_each_possible_cpu(i)
 926                 hits += per_cpu(nfsd_file_cache_hits, i);
 927
 928         seq_printf(m, "total entries: %u\n", count);
 929         seq_printf(m, "longest chain: %u\n", longest);
 930         seq_printf(m, "cache hits:    %lu\n", hits);
 931         return 0;
 932 }
 933
 934 int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
 935 {
 936         return single_open(file, nfsd_file_cache_stats_show, NULL);
 937 }