]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/gfs2/glock.c
gfs2: Introduce function gfs2_withdrawn
[linux.git] / fs / gfs2 / glock.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4  * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
5  */
6
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 #include <linux/spinlock.h>
12 #include <linux/buffer_head.h>
13 #include <linux/delay.h>
14 #include <linux/sort.h>
15 #include <linux/hash.h>
16 #include <linux/jhash.h>
17 #include <linux/kallsyms.h>
18 #include <linux/gfs2_ondisk.h>
19 #include <linux/list.h>
20 #include <linux/wait.h>
21 #include <linux/module.h>
22 #include <linux/uaccess.h>
23 #include <linux/seq_file.h>
24 #include <linux/debugfs.h>
25 #include <linux/kthread.h>
26 #include <linux/freezer.h>
27 #include <linux/workqueue.h>
28 #include <linux/jiffies.h>
29 #include <linux/rcupdate.h>
30 #include <linux/rculist_bl.h>
31 #include <linux/bit_spinlock.h>
32 #include <linux/percpu.h>
33 #include <linux/list_sort.h>
34 #include <linux/lockref.h>
35 #include <linux/rhashtable.h>
36
37 #include "gfs2.h"
38 #include "incore.h"
39 #include "glock.h"
40 #include "glops.h"
41 #include "inode.h"
42 #include "lops.h"
43 #include "meta_io.h"
44 #include "quota.h"
45 #include "super.h"
46 #include "util.h"
47 #include "bmap.h"
48 #define CREATE_TRACE_POINTS
49 #include "trace_gfs2.h"
50
51 struct gfs2_glock_iter {
52         struct gfs2_sbd *sdp;           /* incore superblock           */
53         struct rhashtable_iter hti;     /* rhashtable iterator         */
54         struct gfs2_glock *gl;          /* current glock struct        */
55         loff_t last_pos;                /* last position               */
56 };
57
58 typedef void (*glock_examiner) (struct gfs2_glock * gl);
59
60 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
61
62 static struct dentry *gfs2_root;
63 static struct workqueue_struct *glock_workqueue;
64 struct workqueue_struct *gfs2_delete_workqueue;
65 static LIST_HEAD(lru_list);
66 static atomic_t lru_count = ATOMIC_INIT(0);
67 static DEFINE_SPINLOCK(lru_lock);
68
69 #define GFS2_GL_HASH_SHIFT      15
70 #define GFS2_GL_HASH_SIZE       BIT(GFS2_GL_HASH_SHIFT)
71
72 static const struct rhashtable_params ht_parms = {
73         .nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
74         .key_len = offsetofend(struct lm_lockname, ln_type),
75         .key_offset = offsetof(struct gfs2_glock, gl_name),
76         .head_offset = offsetof(struct gfs2_glock, gl_node),
77 };
78
79 static struct rhashtable gl_hash_table;
80
81 #define GLOCK_WAIT_TABLE_BITS 12
82 #define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
83 static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;
84
85 struct wait_glock_queue {
86         struct lm_lockname *name;
87         wait_queue_entry_t wait;
88 };
89
90 static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
91                                int sync, void *key)
92 {
93         struct wait_glock_queue *wait_glock =
94                 container_of(wait, struct wait_glock_queue, wait);
95         struct lm_lockname *wait_name = wait_glock->name;
96         struct lm_lockname *wake_name = key;
97
98         if (wake_name->ln_sbd != wait_name->ln_sbd ||
99             wake_name->ln_number != wait_name->ln_number ||
100             wake_name->ln_type != wait_name->ln_type)
101                 return 0;
102         return autoremove_wake_function(wait, mode, sync, key);
103 }
104
105 static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
106 {
107         u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0);
108
109         return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
110 }
111
112 /**
113  * wake_up_glock  -  Wake up waiters on a glock
114  * @gl: the glock
115  */
116 static void wake_up_glock(struct gfs2_glock *gl)
117 {
118         wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);
119
120         if (waitqueue_active(wq))
121                 __wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
122 }
123
124 static void gfs2_glock_dealloc(struct rcu_head *rcu)
125 {
126         struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
127
128         if (gl->gl_ops->go_flags & GLOF_ASPACE) {
129                 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
130         } else {
131                 kfree(gl->gl_lksb.sb_lvbptr);
132                 kmem_cache_free(gfs2_glock_cachep, gl);
133         }
134 }
135
136 void gfs2_glock_free(struct gfs2_glock *gl)
137 {
138         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
139
140         BUG_ON(atomic_read(&gl->gl_revokes));
141         rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
142         smp_mb();
143         wake_up_glock(gl);
144         call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
145         if (atomic_dec_and_test(&sdp->sd_glock_disposal))
146                 wake_up(&sdp->sd_glock_wait);
147 }
148
149 /**
150  * gfs2_glock_hold() - increment reference count on glock
151  * @gl: The glock to hold
152  *
153  */
154
155 void gfs2_glock_hold(struct gfs2_glock *gl)
156 {
157         GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
158         lockref_get(&gl->gl_lockref);
159 }
160
161 /**
162  * demote_ok - Check to see if it's ok to unlock a glock
163  * @gl: the glock
164  *
165  * Returns: 1 if it's ok
166  */
167
168 static int demote_ok(const struct gfs2_glock *gl)
169 {
170         const struct gfs2_glock_operations *glops = gl->gl_ops;
171
172         if (gl->gl_state == LM_ST_UNLOCKED)
173                 return 0;
174         if (!list_empty(&gl->gl_holders))
175                 return 0;
176         if (glops->go_demote_ok)
177                 return glops->go_demote_ok(gl);
178         return 1;
179 }
180
181
182 void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
183 {
184         if (!(gl->gl_ops->go_flags & GLOF_LRU))
185                 return;
186
187         spin_lock(&lru_lock);
188
189         list_del(&gl->gl_lru);
190         list_add_tail(&gl->gl_lru, &lru_list);
191
192         if (!test_bit(GLF_LRU, &gl->gl_flags)) {
193                 set_bit(GLF_LRU, &gl->gl_flags);
194                 atomic_inc(&lru_count);
195         }
196
197         spin_unlock(&lru_lock);
198 }
199
200 static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
201 {
202         if (!(gl->gl_ops->go_flags & GLOF_LRU))
203                 return;
204
205         spin_lock(&lru_lock);
206         if (test_bit(GLF_LRU, &gl->gl_flags)) {
207                 list_del_init(&gl->gl_lru);
208                 atomic_dec(&lru_count);
209                 clear_bit(GLF_LRU, &gl->gl_flags);
210         }
211         spin_unlock(&lru_lock);
212 }
213
214 /*
215  * Enqueue the glock on the work queue.  Passes one glock reference on to the
216  * work queue.
217  */
218 static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
219         if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
220                 /*
221                  * We are holding the lockref spinlock, and the work was still
222                  * queued above.  The queued work (glock_work_func) takes that
223                  * spinlock before dropping its glock reference(s), so it
224                  * cannot have dropped them in the meantime.
225                  */
226                 GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2);
227                 gl->gl_lockref.count--;
228         }
229 }
230
231 static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
232         spin_lock(&gl->gl_lockref.lock);
233         __gfs2_glock_queue_work(gl, delay);
234         spin_unlock(&gl->gl_lockref.lock);
235 }
236
237 static void __gfs2_glock_put(struct gfs2_glock *gl)
238 {
239         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
240         struct address_space *mapping = gfs2_glock2aspace(gl);
241
242         lockref_mark_dead(&gl->gl_lockref);
243
244         gfs2_glock_remove_from_lru(gl);
245         spin_unlock(&gl->gl_lockref.lock);
246         GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
247         GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
248         trace_gfs2_glock_put(gl);
249         sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
250 }
251
252 /*
253  * Cause the glock to be put in work queue context.
254  */
255 void gfs2_glock_queue_put(struct gfs2_glock *gl)
256 {
257         gfs2_glock_queue_work(gl, 0);
258 }
259
260 /**
261  * gfs2_glock_put() - Decrement reference count on glock
262  * @gl: The glock to put
263  *
264  */
265
266 void gfs2_glock_put(struct gfs2_glock *gl)
267 {
268         if (lockref_put_or_lock(&gl->gl_lockref))
269                 return;
270
271         __gfs2_glock_put(gl);
272 }
273
274 /**
275  * may_grant - check if its ok to grant a new lock
276  * @gl: The glock
277  * @gh: The lock request which we wish to grant
278  *
279  * Returns: true if its ok to grant the lock
280  */
281
282 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
283 {
284         const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
285         if ((gh->gh_state == LM_ST_EXCLUSIVE ||
286              gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
287                 return 0;
288         if (gl->gl_state == gh->gh_state)
289                 return 1;
290         if (gh->gh_flags & GL_EXACT)
291                 return 0;
292         if (gl->gl_state == LM_ST_EXCLUSIVE) {
293                 if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
294                         return 1;
295                 if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
296                         return 1;
297         }
298         if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
299                 return 1;
300         return 0;
301 }
302
303 static void gfs2_holder_wake(struct gfs2_holder *gh)
304 {
305         clear_bit(HIF_WAIT, &gh->gh_iflags);
306         smp_mb__after_atomic();
307         wake_up_bit(&gh->gh_iflags, HIF_WAIT);
308         if (gh->gh_flags & GL_ASYNC) {
309                 struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd;
310
311                 wake_up(&sdp->sd_async_glock_wait);
312         }
313 }
314
315 /**
316  * do_error - Something unexpected has happened during a lock request
317  *
318  */
319
320 static void do_error(struct gfs2_glock *gl, const int ret)
321 {
322         struct gfs2_holder *gh, *tmp;
323
324         list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
325                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
326                         continue;
327                 if (ret & LM_OUT_ERROR)
328                         gh->gh_error = -EIO;
329                 else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
330                         gh->gh_error = GLR_TRYFAILED;
331                 else
332                         continue;
333                 list_del_init(&gh->gh_list);
334                 trace_gfs2_glock_queue(gh, 0);
335                 gfs2_holder_wake(gh);
336         }
337 }
338
339 /**
340  * do_promote - promote as many requests as possible on the current queue
341  * @gl: The glock
342  * 
343  * Returns: 1 if there is a blocked holder at the head of the list, or 2
344  *          if a type specific operation is underway.
345  */
346
347 static int do_promote(struct gfs2_glock *gl)
348 __releases(&gl->gl_lockref.lock)
349 __acquires(&gl->gl_lockref.lock)
350 {
351         const struct gfs2_glock_operations *glops = gl->gl_ops;
352         struct gfs2_holder *gh, *tmp;
353         int ret;
354
355 restart:
356         list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
357                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
358                         continue;
359                 if (may_grant(gl, gh)) {
360                         if (gh->gh_list.prev == &gl->gl_holders &&
361                             glops->go_lock) {
362                                 spin_unlock(&gl->gl_lockref.lock);
363                                 /* FIXME: eliminate this eventually */
364                                 ret = glops->go_lock(gh);
365                                 spin_lock(&gl->gl_lockref.lock);
366                                 if (ret) {
367                                         if (ret == 1)
368                                                 return 2;
369                                         gh->gh_error = ret;
370                                         list_del_init(&gh->gh_list);
371                                         trace_gfs2_glock_queue(gh, 0);
372                                         gfs2_holder_wake(gh);
373                                         goto restart;
374                                 }
375                                 set_bit(HIF_HOLDER, &gh->gh_iflags);
376                                 trace_gfs2_promote(gh, 1);
377                                 gfs2_holder_wake(gh);
378                                 goto restart;
379                         }
380                         set_bit(HIF_HOLDER, &gh->gh_iflags);
381                         trace_gfs2_promote(gh, 0);
382                         gfs2_holder_wake(gh);
383                         continue;
384                 }
385                 if (gh->gh_list.prev == &gl->gl_holders)
386                         return 1;
387                 do_error(gl, 0);
388                 break;
389         }
390         return 0;
391 }
392
393 /**
394  * find_first_waiter - find the first gh that's waiting for the glock
395  * @gl: the glock
396  */
397
398 static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
399 {
400         struct gfs2_holder *gh;
401
402         list_for_each_entry(gh, &gl->gl_holders, gh_list) {
403                 if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
404                         return gh;
405         }
406         return NULL;
407 }
408
409 /**
410  * state_change - record that the glock is now in a different state
411  * @gl: the glock
412  * @new_state the new state
413  *
414  */
415
416 static void state_change(struct gfs2_glock *gl, unsigned int new_state)
417 {
418         int held1, held2;
419
420         held1 = (gl->gl_state != LM_ST_UNLOCKED);
421         held2 = (new_state != LM_ST_UNLOCKED);
422
423         if (held1 != held2) {
424                 GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
425                 if (held2)
426                         gl->gl_lockref.count++;
427                 else
428                         gl->gl_lockref.count--;
429         }
430         if (held1 && held2 && list_empty(&gl->gl_holders))
431                 clear_bit(GLF_QUEUED, &gl->gl_flags);
432
433         if (new_state != gl->gl_target)
434                 /* shorten our minimum hold time */
435                 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
436                                        GL_GLOCK_MIN_HOLD);
437         gl->gl_state = new_state;
438         gl->gl_tchange = jiffies;
439 }
440
441 static void gfs2_demote_wake(struct gfs2_glock *gl)
442 {
443         gl->gl_demote_state = LM_ST_EXCLUSIVE;
444         clear_bit(GLF_DEMOTE, &gl->gl_flags);
445         smp_mb__after_atomic();
446         wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
447 }
448
449 /**
450  * finish_xmote - The DLM has replied to one of our lock requests
451  * @gl: The glock
452  * @ret: The status from the DLM
453  *
454  */
455
456 static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
457 {
458         const struct gfs2_glock_operations *glops = gl->gl_ops;
459         struct gfs2_holder *gh;
460         unsigned state = ret & LM_OUT_ST_MASK;
461         int rv;
462
463         spin_lock(&gl->gl_lockref.lock);
464         trace_gfs2_glock_state_change(gl, state);
465         state_change(gl, state);
466         gh = find_first_waiter(gl);
467
468         /* Demote to UN request arrived during demote to SH or DF */
469         if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
470             state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
471                 gl->gl_target = LM_ST_UNLOCKED;
472
473         /* Check for state != intended state */
474         if (unlikely(state != gl->gl_target)) {
475                 if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
476                         /* move to back of queue and try next entry */
477                         if (ret & LM_OUT_CANCELED) {
478                                 if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
479                                         list_move_tail(&gh->gh_list, &gl->gl_holders);
480                                 gh = find_first_waiter(gl);
481                                 gl->gl_target = gh->gh_state;
482                                 goto retry;
483                         }
484                         /* Some error or failed "try lock" - report it */
485                         if ((ret & LM_OUT_ERROR) ||
486                             (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
487                                 gl->gl_target = gl->gl_state;
488                                 do_error(gl, ret);
489                                 goto out;
490                         }
491                 }
492                 switch(state) {
493                 /* Unlocked due to conversion deadlock, try again */
494                 case LM_ST_UNLOCKED:
495 retry:
496                         do_xmote(gl, gh, gl->gl_target);
497                         break;
498                 /* Conversion fails, unlock and try again */
499                 case LM_ST_SHARED:
500                 case LM_ST_DEFERRED:
501                         do_xmote(gl, gh, LM_ST_UNLOCKED);
502                         break;
503                 default: /* Everything else */
504                         fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n",
505                                gl->gl_target, state);
506                         GLOCK_BUG_ON(gl, 1);
507                 }
508                 spin_unlock(&gl->gl_lockref.lock);
509                 return;
510         }
511
512         /* Fast path - we got what we asked for */
513         if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
514                 gfs2_demote_wake(gl);
515         if (state != LM_ST_UNLOCKED) {
516                 if (glops->go_xmote_bh) {
517                         spin_unlock(&gl->gl_lockref.lock);
518                         rv = glops->go_xmote_bh(gl, gh);
519                         spin_lock(&gl->gl_lockref.lock);
520                         if (rv) {
521                                 do_error(gl, rv);
522                                 goto out;
523                         }
524                 }
525                 rv = do_promote(gl);
526                 if (rv == 2)
527                         goto out_locked;
528         }
529 out:
530         clear_bit(GLF_LOCK, &gl->gl_flags);
531 out_locked:
532         spin_unlock(&gl->gl_lockref.lock);
533 }
534
535 /**
536  * do_xmote - Calls the DLM to change the state of a lock
537  * @gl: The lock state
538  * @gh: The holder (only for promotes)
539  * @target: The target lock state
540  *
541  */
542
543 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
544 __releases(&gl->gl_lockref.lock)
545 __acquires(&gl->gl_lockref.lock)
546 {
547         const struct gfs2_glock_operations *glops = gl->gl_ops;
548         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
549         unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
550         int ret;
551
552         if (unlikely(gfs2_withdrawn(sdp)) &&
553             target != LM_ST_UNLOCKED)
554                 return;
555         lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
556                       LM_FLAG_PRIORITY);
557         GLOCK_BUG_ON(gl, gl->gl_state == target);
558         GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
559         if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
560             glops->go_inval) {
561                 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
562                 do_error(gl, 0); /* Fail queued try locks */
563         }
564         gl->gl_req = target;
565         set_bit(GLF_BLOCKING, &gl->gl_flags);
566         if ((gl->gl_req == LM_ST_UNLOCKED) ||
567             (gl->gl_state == LM_ST_EXCLUSIVE) ||
568             (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
569                 clear_bit(GLF_BLOCKING, &gl->gl_flags);
570         spin_unlock(&gl->gl_lockref.lock);
571         if (glops->go_sync)
572                 glops->go_sync(gl);
573         if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
574                 glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
575         clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
576
577         gfs2_glock_hold(gl);
578         if (sdp->sd_lockstruct.ls_ops->lm_lock) {
579                 /* lock_dlm */
580                 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
581                 if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
582                     target == LM_ST_UNLOCKED &&
583                     test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
584                         finish_xmote(gl, target);
585                         gfs2_glock_queue_work(gl, 0);
586                 }
587                 else if (ret) {
588                         fs_err(sdp, "lm_lock ret %d\n", ret);
589                         GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp));
590                 }
591         } else { /* lock_nolock */
592                 finish_xmote(gl, target);
593                 gfs2_glock_queue_work(gl, 0);
594         }
595
596         spin_lock(&gl->gl_lockref.lock);
597 }
598
599 /**
600  * find_first_holder - find the first "holder" gh
601  * @gl: the glock
602  */
603
604 static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
605 {
606         struct gfs2_holder *gh;
607
608         if (!list_empty(&gl->gl_holders)) {
609                 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
610                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
611                         return gh;
612         }
613         return NULL;
614 }
615
616 /**
617  * run_queue - do all outstanding tasks related to a glock
618  * @gl: The glock in question
619  * @nonblock: True if we must not block in run_queue
620  *
621  */
622
623 static void run_queue(struct gfs2_glock *gl, const int nonblock)
624 __releases(&gl->gl_lockref.lock)
625 __acquires(&gl->gl_lockref.lock)
626 {
627         struct gfs2_holder *gh = NULL;
628         int ret;
629
630         if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
631                 return;
632
633         GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
634
635         if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
636             gl->gl_demote_state != gl->gl_state) {
637                 if (find_first_holder(gl))
638                         goto out_unlock;
639                 if (nonblock)
640                         goto out_sched;
641                 set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
642                 GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
643                 gl->gl_target = gl->gl_demote_state;
644         } else {
645                 if (test_bit(GLF_DEMOTE, &gl->gl_flags))
646                         gfs2_demote_wake(gl);
647                 ret = do_promote(gl);
648                 if (ret == 0)
649                         goto out_unlock;
650                 if (ret == 2)
651                         goto out;
652                 gh = find_first_waiter(gl);
653                 gl->gl_target = gh->gh_state;
654                 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
655                         do_error(gl, 0); /* Fail queued try locks */
656         }
657         do_xmote(gl, gh, gl->gl_target);
658 out:
659         return;
660
661 out_sched:
662         clear_bit(GLF_LOCK, &gl->gl_flags);
663         smp_mb__after_atomic();
664         gl->gl_lockref.count++;
665         __gfs2_glock_queue_work(gl, 0);
666         return;
667
668 out_unlock:
669         clear_bit(GLF_LOCK, &gl->gl_flags);
670         smp_mb__after_atomic();
671         return;
672 }
673
674 static void delete_work_func(struct work_struct *work)
675 {
676         struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
677         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
678         struct inode *inode;
679         u64 no_addr = gl->gl_name.ln_number;
680
681         /* If someone's using this glock to create a new dinode, the block must
682            have been freed by another node, then re-used, in which case our
683            iopen callback is too late after the fact. Ignore it. */
684         if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
685                 goto out;
686
687         inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
688         if (!IS_ERR_OR_NULL(inode)) {
689                 d_prune_aliases(inode);
690                 iput(inode);
691         }
692 out:
693         gfs2_glock_put(gl);
694 }
695
696 static void glock_work_func(struct work_struct *work)
697 {
698         unsigned long delay = 0;
699         struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
700         unsigned int drop_refs = 1;
701
702         if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
703                 finish_xmote(gl, gl->gl_reply);
704                 drop_refs++;
705         }
706         spin_lock(&gl->gl_lockref.lock);
707         if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
708             gl->gl_state != LM_ST_UNLOCKED &&
709             gl->gl_demote_state != LM_ST_EXCLUSIVE) {
710                 unsigned long holdtime, now = jiffies;
711
712                 holdtime = gl->gl_tchange + gl->gl_hold_time;
713                 if (time_before(now, holdtime))
714                         delay = holdtime - now;
715
716                 if (!delay) {
717                         clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
718                         set_bit(GLF_DEMOTE, &gl->gl_flags);
719                 }
720         }
721         run_queue(gl, 0);
722         if (delay) {
723                 /* Keep one glock reference for the work we requeue. */
724                 drop_refs--;
725                 if (gl->gl_name.ln_type != LM_TYPE_INODE)
726                         delay = 0;
727                 __gfs2_glock_queue_work(gl, delay);
728         }
729
730         /*
731          * Drop the remaining glock references manually here. (Mind that
732          * __gfs2_glock_queue_work depends on the lockref spinlock begin held
733          * here as well.)
734          */
735         gl->gl_lockref.count -= drop_refs;
736         if (!gl->gl_lockref.count) {
737                 __gfs2_glock_put(gl);
738                 return;
739         }
740         spin_unlock(&gl->gl_lockref.lock);
741 }
742
743 static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
744                                             struct gfs2_glock *new)
745 {
746         struct wait_glock_queue wait;
747         wait_queue_head_t *wq = glock_waitqueue(name);
748         struct gfs2_glock *gl;
749
750         wait.name = name;
751         init_wait(&wait.wait);
752         wait.wait.func = glock_wake_function;
753
754 again:
755         prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
756         rcu_read_lock();
757         if (new) {
758                 gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
759                         &new->gl_node, ht_parms);
760                 if (IS_ERR(gl))
761                         goto out;
762         } else {
763                 gl = rhashtable_lookup_fast(&gl_hash_table,
764                         name, ht_parms);
765         }
766         if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
767                 rcu_read_unlock();
768                 schedule();
769                 goto again;
770         }
771 out:
772         rcu_read_unlock();
773         finish_wait(wq, &wait.wait);
774         return gl;
775 }
776
777 /**
778  * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
779  * @sdp: The GFS2 superblock
780  * @number: the lock number
781  * @glops: The glock_operations to use
782  * @create: If 0, don't create the glock if it doesn't exist
783  * @glp: the glock is returned here
784  *
785  * This does not lock a glock, just finds/creates structures for one.
786  *
787  * Returns: errno
788  */
789
790 int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
791                    const struct gfs2_glock_operations *glops, int create,
792                    struct gfs2_glock **glp)
793 {
794         struct super_block *s = sdp->sd_vfs;
795         struct lm_lockname name = { .ln_number = number,
796                                     .ln_type = glops->go_type,
797                                     .ln_sbd = sdp };
798         struct gfs2_glock *gl, *tmp;
799         struct address_space *mapping;
800         struct kmem_cache *cachep;
801         int ret = 0;
802
803         gl = find_insert_glock(&name, NULL);
804         if (gl) {
805                 *glp = gl;
806                 return 0;
807         }
808         if (!create)
809                 return -ENOENT;
810
811         if (glops->go_flags & GLOF_ASPACE)
812                 cachep = gfs2_glock_aspace_cachep;
813         else
814                 cachep = gfs2_glock_cachep;
815         gl = kmem_cache_alloc(cachep, GFP_NOFS);
816         if (!gl)
817                 return -ENOMEM;
818
819         memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
820
821         if (glops->go_flags & GLOF_LVB) {
822                 gl->gl_lksb.sb_lvbptr = kzalloc(GFS2_MIN_LVB_SIZE, GFP_NOFS);
823                 if (!gl->gl_lksb.sb_lvbptr) {
824                         kmem_cache_free(cachep, gl);
825                         return -ENOMEM;
826                 }
827         }
828
829         atomic_inc(&sdp->sd_glock_disposal);
830         gl->gl_node.next = NULL;
831         gl->gl_flags = 0;
832         gl->gl_name = name;
833         gl->gl_lockref.count = 1;
834         gl->gl_state = LM_ST_UNLOCKED;
835         gl->gl_target = LM_ST_UNLOCKED;
836         gl->gl_demote_state = LM_ST_EXCLUSIVE;
837         gl->gl_ops = glops;
838         gl->gl_dstamp = 0;
839         preempt_disable();
840         /* We use the global stats to estimate the initial per-glock stats */
841         gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
842         preempt_enable();
843         gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
844         gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
845         gl->gl_tchange = jiffies;
846         gl->gl_object = NULL;
847         gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
848         INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
849         INIT_WORK(&gl->gl_delete, delete_work_func);
850
851         mapping = gfs2_glock2aspace(gl);
852         if (mapping) {
853                 mapping->a_ops = &gfs2_meta_aops;
854                 mapping->host = s->s_bdev->bd_inode;
855                 mapping->flags = 0;
856                 mapping_set_gfp_mask(mapping, GFP_NOFS);
857                 mapping->private_data = NULL;
858                 mapping->writeback_index = 0;
859         }
860
861         tmp = find_insert_glock(&name, gl);
862         if (!tmp) {
863                 *glp = gl;
864                 goto out;
865         }
866         if (IS_ERR(tmp)) {
867                 ret = PTR_ERR(tmp);
868                 goto out_free;
869         }
870         *glp = tmp;
871
872 out_free:
873         kfree(gl->gl_lksb.sb_lvbptr);
874         kmem_cache_free(cachep, gl);
875         atomic_dec(&sdp->sd_glock_disposal);
876
877 out:
878         return ret;
879 }
880
881 /**
882  * gfs2_holder_init - initialize a struct gfs2_holder in the default way
883  * @gl: the glock
884  * @state: the state we're requesting
885  * @flags: the modifier flags
886  * @gh: the holder structure
887  *
888  */
889
890 void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
891                       struct gfs2_holder *gh)
892 {
893         INIT_LIST_HEAD(&gh->gh_list);
894         gh->gh_gl = gl;
895         gh->gh_ip = _RET_IP_;
896         gh->gh_owner_pid = get_pid(task_pid(current));
897         gh->gh_state = state;
898         gh->gh_flags = flags;
899         gh->gh_error = 0;
900         gh->gh_iflags = 0;
901         gfs2_glock_hold(gl);
902 }
903
904 /**
905  * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
906  * @state: the state we're requesting
907  * @flags: the modifier flags
908  * @gh: the holder structure
909  *
910  * Don't mess with the glock.
911  *
912  */
913
914 void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh)
915 {
916         gh->gh_state = state;
917         gh->gh_flags = flags;
918         gh->gh_iflags = 0;
919         gh->gh_ip = _RET_IP_;
920         put_pid(gh->gh_owner_pid);
921         gh->gh_owner_pid = get_pid(task_pid(current));
922 }
923
924 /**
925  * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
926  * @gh: the holder structure
927  *
928  */
929
930 void gfs2_holder_uninit(struct gfs2_holder *gh)
931 {
932         put_pid(gh->gh_owner_pid);
933         gfs2_glock_put(gh->gh_gl);
934         gfs2_holder_mark_uninitialized(gh);
935         gh->gh_ip = 0;
936 }
937
938 static void gfs2_glock_update_hold_time(struct gfs2_glock *gl,
939                                         unsigned long start_time)
940 {
941         /* Have we waited longer that a second? */
942         if (time_after(jiffies, start_time + HZ)) {
943                 /* Lengthen the minimum hold time. */
944                 gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR,
945                                        GL_GLOCK_MAX_HOLD);
946         }
947 }
948
949 /**
950  * gfs2_glock_wait - wait on a glock acquisition
951  * @gh: the glock holder
952  *
953  * Returns: 0 on success
954  */
955
956 int gfs2_glock_wait(struct gfs2_holder *gh)
957 {
958         unsigned long start_time = jiffies;
959
960         might_sleep();
961         wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
962         gfs2_glock_update_hold_time(gh->gh_gl, start_time);
963         return gh->gh_error;
964 }
965
966 static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
967 {
968         int i;
969
970         for (i = 0; i < num_gh; i++)
971                 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags))
972                         return 1;
973         return 0;
974 }
975
976 /**
977  * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
978  * @num_gh: the number of holders in the array
979  * @ghs: the glock holder array
980  *
981  * Returns: 0 on success, meaning all glocks have been granted and are held.
982  *          -ESTALE if the request timed out, meaning all glocks were released,
983  *          and the caller should retry the operation.
984  */
985
986 int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
987 {
988         struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
989         int i, ret = 0, timeout = 0;
990         unsigned long start_time = jiffies;
991         bool keep_waiting;
992
993         might_sleep();
994         /*
995          * Total up the (minimum hold time * 2) of all glocks and use that to
996          * determine the max amount of time we should wait.
997          */
998         for (i = 0; i < num_gh; i++)
999                 timeout += ghs[i].gh_gl->gl_hold_time << 1;
1000
1001 wait_for_dlm:
1002         if (!wait_event_timeout(sdp->sd_async_glock_wait,
1003                                 !glocks_pending(num_gh, ghs), timeout))
1004                 ret = -ESTALE; /* request timed out. */
1005
1006         /*
1007          * If dlm granted all our requests, we need to adjust the glock
1008          * minimum hold time values according to how long we waited.
1009          *
1010          * If our request timed out, we need to repeatedly release any held
1011          * glocks we acquired thus far to allow dlm to acquire the remaining
1012          * glocks without deadlocking.  We cannot currently cancel outstanding
1013          * glock acquisitions.
1014          *
1015          * The HIF_WAIT bit tells us which requests still need a response from
1016          * dlm.
1017          *
1018          * If dlm sent us any errors, we return the first error we find.
1019          */
1020         keep_waiting = false;
1021         for (i = 0; i < num_gh; i++) {
1022                 /* Skip holders we have already dequeued below. */
1023                 if (!gfs2_holder_queued(&ghs[i]))
1024                         continue;
1025                 /* Skip holders with a pending DLM response. */
1026                 if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) {
1027                         keep_waiting = true;
1028                         continue;
1029                 }
1030
1031                 if (test_bit(HIF_HOLDER, &ghs[i].gh_iflags)) {
1032                         if (ret == -ESTALE)
1033                                 gfs2_glock_dq(&ghs[i]);
1034                         else
1035                                 gfs2_glock_update_hold_time(ghs[i].gh_gl,
1036                                                             start_time);
1037                 }
1038                 if (!ret)
1039                         ret = ghs[i].gh_error;
1040         }
1041
1042         if (keep_waiting)
1043                 goto wait_for_dlm;
1044
1045         /*
1046          * At this point, we've either acquired all locks or released them all.
1047          */
1048         return ret;
1049 }
1050
1051 /**
1052  * handle_callback - process a demote request
1053  * @gl: the glock
1054  * @state: the state the caller wants us to change to
1055  *
1056  * There are only two requests that we are going to see in actual
1057  * practise: LM_ST_SHARED and LM_ST_UNLOCKED
1058  */
1059
1060 static void handle_callback(struct gfs2_glock *gl, unsigned int state,
1061                             unsigned long delay, bool remote)
1062 {
1063         int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE;
1064
1065         set_bit(bit, &gl->gl_flags);
1066         if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
1067                 gl->gl_demote_state = state;
1068                 gl->gl_demote_time = jiffies;
1069         } else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
1070                         gl->gl_demote_state != state) {
1071                 gl->gl_demote_state = LM_ST_UNLOCKED;
1072         }
1073         if (gl->gl_ops->go_callback)
1074                 gl->gl_ops->go_callback(gl, remote);
1075         trace_gfs2_demote_rq(gl, remote);
1076 }
1077
1078 void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
1079 {
1080         struct va_format vaf;
1081         va_list args;
1082
1083         va_start(args, fmt);
1084
1085         if (seq) {
1086                 seq_vprintf(seq, fmt, args);
1087         } else {
1088                 vaf.fmt = fmt;
1089                 vaf.va = &args;
1090
1091                 pr_err("%pV", &vaf);
1092         }
1093
1094         va_end(args);
1095 }
1096
1097 /**
1098  * add_to_queue - Add a holder to the wait queue (but look for recursion)
1099  * @gh: the holder structure to add
1100  *
1101  * Eventually we should move the recursive locking trap to a
1102  * debugging option or something like that. This is the fast
1103  * path and needs to have the minimum number of distractions.
1104  * 
1105  */
1106
1107 static inline void add_to_queue(struct gfs2_holder *gh)
1108 __releases(&gl->gl_lockref.lock)
1109 __acquires(&gl->gl_lockref.lock)
1110 {
1111         struct gfs2_glock *gl = gh->gh_gl;
1112         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1113         struct list_head *insert_pt = NULL;
1114         struct gfs2_holder *gh2;
1115         int try_futile = 0;
1116
1117         GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL);
1118         if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
1119                 GLOCK_BUG_ON(gl, true);
1120
1121         if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
1122                 if (test_bit(GLF_LOCK, &gl->gl_flags))
1123                         try_futile = !may_grant(gl, gh);
1124                 if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
1125                         goto fail;
1126         }
1127
1128         list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
1129                 if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
1130                     (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
1131                         goto trap_recursive;
1132                 if (try_futile &&
1133                     !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
1134 fail:
1135                         gh->gh_error = GLR_TRYFAILED;
1136                         gfs2_holder_wake(gh);
1137                         return;
1138                 }
1139                 if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
1140                         continue;
1141                 if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
1142                         insert_pt = &gh2->gh_list;
1143         }
1144         set_bit(GLF_QUEUED, &gl->gl_flags);
1145         trace_gfs2_glock_queue(gh, 1);
1146         gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
1147         gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
1148         if (likely(insert_pt == NULL)) {
1149                 list_add_tail(&gh->gh_list, &gl->gl_holders);
1150                 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1151                         goto do_cancel;
1152                 return;
1153         }
1154         list_add_tail(&gh->gh_list, insert_pt);
1155 do_cancel:
1156         gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
1157         if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
1158                 spin_unlock(&gl->gl_lockref.lock);
1159                 if (sdp->sd_lockstruct.ls_ops->lm_cancel)
1160                         sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
1161                 spin_lock(&gl->gl_lockref.lock);
1162         }
1163         return;
1164
1165 trap_recursive:
1166         fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip);
1167         fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid));
1168         fs_err(sdp, "lock type: %d req lock state : %d\n",
1169                gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
1170         fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip);
1171         fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid));
1172         fs_err(sdp, "lock type: %d req lock state : %d\n",
1173                gh->gh_gl->gl_name.ln_type, gh->gh_state);
1174         gfs2_dump_glock(NULL, gl, true);
1175         BUG();
1176 }
1177
1178 /**
1179  * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
1180  * @gh: the holder structure
1181  *
1182  * if (gh->gh_flags & GL_ASYNC), this never returns an error
1183  *
1184  * Returns: 0, GLR_TRYFAILED, or errno on failure
1185  */
1186
1187 int gfs2_glock_nq(struct gfs2_holder *gh)
1188 {
1189         struct gfs2_glock *gl = gh->gh_gl;
1190         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1191         int error = 0;
1192
1193         if (unlikely(gfs2_withdrawn(sdp)))
1194                 return -EIO;
1195
1196         if (test_bit(GLF_LRU, &gl->gl_flags))
1197                 gfs2_glock_remove_from_lru(gl);
1198
1199         spin_lock(&gl->gl_lockref.lock);
1200         add_to_queue(gh);
1201         if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) &&
1202                      test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
1203                 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1204                 gl->gl_lockref.count++;
1205                 __gfs2_glock_queue_work(gl, 0);
1206         }
1207         run_queue(gl, 1);
1208         spin_unlock(&gl->gl_lockref.lock);
1209
1210         if (!(gh->gh_flags & GL_ASYNC))
1211                 error = gfs2_glock_wait(gh);
1212
1213         return error;
1214 }
1215
1216 /**
1217  * gfs2_glock_poll - poll to see if an async request has been completed
1218  * @gh: the holder
1219  *
1220  * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
1221  */
1222
1223 int gfs2_glock_poll(struct gfs2_holder *gh)
1224 {
1225         return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
1226 }
1227
1228 /**
1229  * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1230  * @gh: the glock holder
1231  *
1232  */
1233
1234 void gfs2_glock_dq(struct gfs2_holder *gh)
1235 {
1236         struct gfs2_glock *gl = gh->gh_gl;
1237         const struct gfs2_glock_operations *glops = gl->gl_ops;
1238         unsigned delay = 0;
1239         int fast_path = 0;
1240
1241         spin_lock(&gl->gl_lockref.lock);
1242         if (gh->gh_flags & GL_NOCACHE)
1243                 handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1244
1245         list_del_init(&gh->gh_list);
1246         clear_bit(HIF_HOLDER, &gh->gh_iflags);
1247         if (find_first_holder(gl) == NULL) {
1248                 if (glops->go_unlock) {
1249                         GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
1250                         spin_unlock(&gl->gl_lockref.lock);
1251                         glops->go_unlock(gh);
1252                         spin_lock(&gl->gl_lockref.lock);
1253                         clear_bit(GLF_LOCK, &gl->gl_flags);
1254                 }
1255                 if (list_empty(&gl->gl_holders) &&
1256                     !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1257                     !test_bit(GLF_DEMOTE, &gl->gl_flags))
1258                         fast_path = 1;
1259         }
1260         if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
1261                 gfs2_glock_add_to_lru(gl);
1262
1263         trace_gfs2_glock_queue(gh, 0);
1264         if (unlikely(!fast_path)) {
1265                 gl->gl_lockref.count++;
1266                 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1267                     !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
1268                     gl->gl_name.ln_type == LM_TYPE_INODE)
1269                         delay = gl->gl_hold_time;
1270                 __gfs2_glock_queue_work(gl, delay);
1271         }
1272         spin_unlock(&gl->gl_lockref.lock);
1273 }
1274
1275 void gfs2_glock_dq_wait(struct gfs2_holder *gh)
1276 {
1277         struct gfs2_glock *gl = gh->gh_gl;
1278         gfs2_glock_dq(gh);
1279         might_sleep();
1280         wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
1281 }
1282
1283 /**
1284  * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
1285  * @gh: the holder structure
1286  *
1287  */
1288
1289 void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
1290 {
1291         gfs2_glock_dq(gh);
1292         gfs2_holder_uninit(gh);
1293 }
1294
1295 /**
1296  * gfs2_glock_nq_num - acquire a glock based on lock number
1297  * @sdp: the filesystem
1298  * @number: the lock number
1299  * @glops: the glock operations for the type of glock
1300  * @state: the state to acquire the glock in
1301  * @flags: modifier flags for the acquisition
1302  * @gh: the struct gfs2_holder
1303  *
1304  * Returns: errno
1305  */
1306
1307 int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
1308                       const struct gfs2_glock_operations *glops,
1309                       unsigned int state, u16 flags, struct gfs2_holder *gh)
1310 {
1311         struct gfs2_glock *gl;
1312         int error;
1313
1314         error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
1315         if (!error) {
1316                 error = gfs2_glock_nq_init(gl, state, flags, gh);
1317                 gfs2_glock_put(gl);
1318         }
1319
1320         return error;
1321 }
1322
1323 /**
1324  * glock_compare - Compare two struct gfs2_glock structures for sorting
1325  * @arg_a: the first structure
1326  * @arg_b: the second structure
1327  *
1328  */
1329
1330 static int glock_compare(const void *arg_a, const void *arg_b)
1331 {
1332         const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
1333         const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
1334         const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1335         const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1336
1337         if (a->ln_number > b->ln_number)
1338                 return 1;
1339         if (a->ln_number < b->ln_number)
1340                 return -1;
1341         BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
1342         return 0;
1343 }
1344
1345 /**
1346  * nq_m_sync - synchonously acquire more than one glock in deadlock free order
1347  * @num_gh: the number of structures
1348  * @ghs: an array of struct gfs2_holder structures
1349  *
1350  * Returns: 0 on success (all glocks acquired),
1351  *          errno on failure (no glocks acquired)
1352  */
1353
1354 static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
1355                      struct gfs2_holder **p)
1356 {
1357         unsigned int x;
1358         int error = 0;
1359
1360         for (x = 0; x < num_gh; x++)
1361                 p[x] = &ghs[x];
1362
1363         sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);
1364
1365         for (x = 0; x < num_gh; x++) {
1366                 p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1367
1368                 error = gfs2_glock_nq(p[x]);
1369                 if (error) {
1370                         while (x--)
1371                                 gfs2_glock_dq(p[x]);
1372                         break;
1373                 }
1374         }
1375
1376         return error;
1377 }
1378
1379 /**
1380  * gfs2_glock_nq_m - acquire multiple glocks
1381  * @num_gh: the number of structures
1382  * @ghs: an array of struct gfs2_holder structures
1383  *
1384  *
1385  * Returns: 0 on success (all glocks acquired),
1386  *          errno on failure (no glocks acquired)
1387  */
1388
1389 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1390 {
1391         struct gfs2_holder *tmp[4];
1392         struct gfs2_holder **pph = tmp;
1393         int error = 0;
1394
1395         switch(num_gh) {
1396         case 0:
1397                 return 0;
1398         case 1:
1399                 ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1400                 return gfs2_glock_nq(ghs);
1401         default:
1402                 if (num_gh <= 4)
1403                         break;
1404                 pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *),
1405                                     GFP_NOFS);
1406                 if (!pph)
1407                         return -ENOMEM;
1408         }
1409
1410         error = nq_m_sync(num_gh, ghs, pph);
1411
1412         if (pph != tmp)
1413                 kfree(pph);
1414
1415         return error;
1416 }
1417
1418 /**
1419  * gfs2_glock_dq_m - release multiple glocks
1420  * @num_gh: the number of structures
1421  * @ghs: an array of struct gfs2_holder structures
1422  *
1423  */
1424
1425 void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1426 {
1427         while (num_gh--)
1428                 gfs2_glock_dq(&ghs[num_gh]);
1429 }
1430
1431 void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1432 {
1433         unsigned long delay = 0;
1434         unsigned long holdtime;
1435         unsigned long now = jiffies;
1436
1437         gfs2_glock_hold(gl);
1438         holdtime = gl->gl_tchange + gl->gl_hold_time;
1439         if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
1440             gl->gl_name.ln_type == LM_TYPE_INODE) {
1441                 if (time_before(now, holdtime))
1442                         delay = holdtime - now;
1443                 if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
1444                         delay = gl->gl_hold_time;
1445         }
1446
1447         spin_lock(&gl->gl_lockref.lock);
1448         handle_callback(gl, state, delay, true);
1449         __gfs2_glock_queue_work(gl, delay);
1450         spin_unlock(&gl->gl_lockref.lock);
1451 }
1452
1453 /**
1454  * gfs2_should_freeze - Figure out if glock should be frozen
1455  * @gl: The glock in question
1456  *
1457  * Glocks are not frozen if (a) the result of the dlm operation is
1458  * an error, (b) the locking operation was an unlock operation or
1459  * (c) if there is a "noexp" flagged request anywhere in the queue
1460  *
1461  * Returns: 1 if freezing should occur, 0 otherwise
1462  */
1463
1464 static int gfs2_should_freeze(const struct gfs2_glock *gl)
1465 {
1466         const struct gfs2_holder *gh;
1467
1468         if (gl->gl_reply & ~LM_OUT_ST_MASK)
1469                 return 0;
1470         if (gl->gl_target == LM_ST_UNLOCKED)
1471                 return 0;
1472
1473         list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1474                 if (test_bit(HIF_HOLDER, &gh->gh_iflags))
1475                         continue;
1476                 if (LM_FLAG_NOEXP & gh->gh_flags)
1477                         return 0;
1478         }
1479
1480         return 1;
1481 }
1482
1483 /**
1484  * gfs2_glock_complete - Callback used by locking
1485  * @gl: Pointer to the glock
1486  * @ret: The return value from the dlm
1487  *
1488  * The gl_reply field is under the gl_lockref.lock lock so that it is ok
1489  * to use a bitfield shared with other glock state fields.
1490  */
1491
1492 void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1493 {
1494         struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
1495
1496         spin_lock(&gl->gl_lockref.lock);
1497         gl->gl_reply = ret;
1498
1499         if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
1500                 if (gfs2_should_freeze(gl)) {
1501                         set_bit(GLF_FROZEN, &gl->gl_flags);
1502                         spin_unlock(&gl->gl_lockref.lock);
1503                         return;
1504                 }
1505         }
1506
1507         gl->gl_lockref.count++;
1508         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1509         __gfs2_glock_queue_work(gl, 0);
1510         spin_unlock(&gl->gl_lockref.lock);
1511 }
1512
1513 static int glock_cmp(void *priv, struct list_head *a, struct list_head *b)
1514 {
1515         struct gfs2_glock *gla, *glb;
1516
1517         gla = list_entry(a, struct gfs2_glock, gl_lru);
1518         glb = list_entry(b, struct gfs2_glock, gl_lru);
1519
1520         if (gla->gl_name.ln_number > glb->gl_name.ln_number)
1521                 return 1;
1522         if (gla->gl_name.ln_number < glb->gl_name.ln_number)
1523                 return -1;
1524
1525         return 0;
1526 }
1527
1528 /**
1529  * gfs2_dispose_glock_lru - Demote a list of glocks
1530  * @list: The list to dispose of
1531  *
1532  * Disposing of glocks may involve disk accesses, so that here we sort
1533  * the glocks by number (i.e. disk location of the inodes) so that if
1534  * there are any such accesses, they'll be sent in order (mostly).
1535  *
1536  * Must be called under the lru_lock, but may drop and retake this
1537  * lock. While the lru_lock is dropped, entries may vanish from the
1538  * list, but no new entries will appear on the list (since it is
1539  * private)
1540  */
1541
1542 static void gfs2_dispose_glock_lru(struct list_head *list)
1543 __releases(&lru_lock)
1544 __acquires(&lru_lock)
1545 {
1546         struct gfs2_glock *gl;
1547
1548         list_sort(NULL, list, glock_cmp);
1549
1550         while(!list_empty(list)) {
1551                 gl = list_entry(list->next, struct gfs2_glock, gl_lru);
1552                 list_del_init(&gl->gl_lru);
1553                 if (!spin_trylock(&gl->gl_lockref.lock)) {
1554 add_back_to_lru:
1555                         list_add(&gl->gl_lru, &lru_list);
1556                         set_bit(GLF_LRU, &gl->gl_flags);
1557                         atomic_inc(&lru_count);
1558                         continue;
1559                 }
1560                 if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
1561                         spin_unlock(&gl->gl_lockref.lock);
1562                         goto add_back_to_lru;
1563                 }
1564                 gl->gl_lockref.count++;
1565                 if (demote_ok(gl))
1566                         handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1567                 WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1568                 __gfs2_glock_queue_work(gl, 0);
1569                 spin_unlock(&gl->gl_lockref.lock);
1570                 cond_resched_lock(&lru_lock);
1571         }
1572 }
1573
1574 /**
1575  * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
1576  * @nr: The number of entries to scan
1577  *
1578  * This function selects the entries on the LRU which are able to
1579  * be demoted, and then kicks off the process by calling
1580  * gfs2_dispose_glock_lru() above.
1581  */
1582
1583 static long gfs2_scan_glock_lru(int nr)
1584 {
1585         struct gfs2_glock *gl;
1586         LIST_HEAD(skipped);
1587         LIST_HEAD(dispose);
1588         long freed = 0;
1589
1590         spin_lock(&lru_lock);
1591         while ((nr-- >= 0) && !list_empty(&lru_list)) {
1592                 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1593
1594                 /* Test for being demotable */
1595                 if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
1596                         list_move(&gl->gl_lru, &dispose);
1597                         atomic_dec(&lru_count);
1598                         clear_bit(GLF_LRU, &gl->gl_flags);
1599                         freed++;
1600                         continue;
1601                 }
1602
1603                 list_move(&gl->gl_lru, &skipped);
1604         }
1605         list_splice(&skipped, &lru_list);
1606         if (!list_empty(&dispose))
1607                 gfs2_dispose_glock_lru(&dispose);
1608         spin_unlock(&lru_lock);
1609
1610         return freed;
1611 }
1612
1613 static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
1614                                             struct shrink_control *sc)
1615 {
1616         if (!(sc->gfp_mask & __GFP_FS))
1617                 return SHRINK_STOP;
1618         return gfs2_scan_glock_lru(sc->nr_to_scan);
1619 }
1620
1621 static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
1622                                              struct shrink_control *sc)
1623 {
1624         return vfs_pressure_ratio(atomic_read(&lru_count));
1625 }
1626
1627 static struct shrinker glock_shrinker = {
1628         .seeks = DEFAULT_SEEKS,
1629         .count_objects = gfs2_glock_shrink_count,
1630         .scan_objects = gfs2_glock_shrink_scan,
1631 };
1632
1633 /**
1634  * examine_bucket - Call a function for glock in a hash bucket
1635  * @examiner: the function
1636  * @sdp: the filesystem
1637  * @bucket: the bucket
1638  *
1639  * Note that the function can be called multiple times on the same
1640  * object.  So the user must ensure that the function can cope with
1641  * that.
1642  */
1643
1644 static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1645 {
1646         struct gfs2_glock *gl;
1647         struct rhashtable_iter iter;
1648
1649         rhashtable_walk_enter(&gl_hash_table, &iter);
1650
1651         do {
1652                 rhashtable_walk_start(&iter);
1653
1654                 while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
1655                         if (gl->gl_name.ln_sbd == sdp &&
1656                             lockref_get_not_dead(&gl->gl_lockref))
1657                                 examiner(gl);
1658
1659                 rhashtable_walk_stop(&iter);
1660         } while (cond_resched(), gl == ERR_PTR(-EAGAIN));
1661
1662         rhashtable_walk_exit(&iter);
1663 }
1664
1665 /**
1666  * thaw_glock - thaw out a glock which has an unprocessed reply waiting
1667  * @gl: The glock to thaw
1668  *
1669  */
1670
1671 static void thaw_glock(struct gfs2_glock *gl)
1672 {
1673         if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) {
1674                 gfs2_glock_put(gl);
1675                 return;
1676         }
1677         set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1678         gfs2_glock_queue_work(gl, 0);
1679 }
1680
1681 /**
1682  * clear_glock - look at a glock and see if we can free it from glock cache
1683  * @gl: the glock to look at
1684  *
1685  */
1686
1687 static void clear_glock(struct gfs2_glock *gl)
1688 {
1689         gfs2_glock_remove_from_lru(gl);
1690
1691         spin_lock(&gl->gl_lockref.lock);
1692         if (gl->gl_state != LM_ST_UNLOCKED)
1693                 handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1694         __gfs2_glock_queue_work(gl, 0);
1695         spin_unlock(&gl->gl_lockref.lock);
1696 }
1697
1698 /**
1699  * gfs2_glock_thaw - Thaw any frozen glocks
1700  * @sdp: The super block
1701  *
1702  */
1703
1704 void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1705 {
1706         glock_hash_walk(thaw_glock, sdp);
1707 }
1708
1709 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
1710 {
1711         spin_lock(&gl->gl_lockref.lock);
1712         gfs2_dump_glock(seq, gl, fsid);
1713         spin_unlock(&gl->gl_lockref.lock);
1714 }
1715
1716 static void dump_glock_func(struct gfs2_glock *gl)
1717 {
1718         dump_glock(NULL, gl, true);
1719 }
1720
1721 /**
1722  * gfs2_gl_hash_clear - Empty out the glock hash table
1723  * @sdp: the filesystem
1724  * @wait: wait until it's all gone
1725  *
1726  * Called when unmounting the filesystem.
1727  */
1728
1729 void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1730 {
1731         set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
1732         flush_workqueue(glock_workqueue);
1733         glock_hash_walk(clear_glock, sdp);
1734         flush_workqueue(glock_workqueue);
1735         wait_event_timeout(sdp->sd_glock_wait,
1736                            atomic_read(&sdp->sd_glock_disposal) == 0,
1737                            HZ * 600);
1738         glock_hash_walk(dump_glock_func, sdp);
1739 }
1740
1741 void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
1742 {
1743         struct gfs2_glock *gl = ip->i_gl;
1744         int ret;
1745
1746         ret = gfs2_truncatei_resume(ip);
1747         gfs2_assert_withdraw(gl->gl_name.ln_sbd, ret == 0);
1748
1749         spin_lock(&gl->gl_lockref.lock);
1750         clear_bit(GLF_LOCK, &gl->gl_flags);
1751         run_queue(gl, 1);
1752         spin_unlock(&gl->gl_lockref.lock);
1753 }
1754
1755 static const char *state2str(unsigned state)
1756 {
1757         switch(state) {
1758         case LM_ST_UNLOCKED:
1759                 return "UN";
1760         case LM_ST_SHARED:
1761                 return "SH";
1762         case LM_ST_DEFERRED:
1763                 return "DF";
1764         case LM_ST_EXCLUSIVE:
1765                 return "EX";
1766         }
1767         return "??";
1768 }
1769
1770 static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
1771 {
1772         char *p = buf;
1773         if (flags & LM_FLAG_TRY)
1774                 *p++ = 't';
1775         if (flags & LM_FLAG_TRY_1CB)
1776                 *p++ = 'T';
1777         if (flags & LM_FLAG_NOEXP)
1778                 *p++ = 'e';
1779         if (flags & LM_FLAG_ANY)
1780                 *p++ = 'A';
1781         if (flags & LM_FLAG_PRIORITY)
1782                 *p++ = 'p';
1783         if (flags & GL_ASYNC)
1784                 *p++ = 'a';
1785         if (flags & GL_EXACT)
1786                 *p++ = 'E';
1787         if (flags & GL_NOCACHE)
1788                 *p++ = 'c';
1789         if (test_bit(HIF_HOLDER, &iflags))
1790                 *p++ = 'H';
1791         if (test_bit(HIF_WAIT, &iflags))
1792                 *p++ = 'W';
1793         if (test_bit(HIF_FIRST, &iflags))
1794                 *p++ = 'F';
1795         *p = 0;
1796         return buf;
1797 }
1798
1799 /**
1800  * dump_holder - print information about a glock holder
1801  * @seq: the seq_file struct
1802  * @gh: the glock holder
1803  * @fs_id_buf: pointer to file system id (if requested)
1804  *
1805  */
1806
1807 static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
1808                         const char *fs_id_buf)
1809 {
1810         struct task_struct *gh_owner = NULL;
1811         char flags_buf[32];
1812
1813         rcu_read_lock();
1814         if (gh->gh_owner_pid)
1815                 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1816         gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
1817                        fs_id_buf, state2str(gh->gh_state),
1818                        hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1819                        gh->gh_error,
1820                        gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1821                        gh_owner ? gh_owner->comm : "(ended)",
1822                        (void *)gh->gh_ip);
1823         rcu_read_unlock();
1824 }
1825
1826 static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1827 {
1828         const unsigned long *gflags = &gl->gl_flags;
1829         char *p = buf;
1830
1831         if (test_bit(GLF_LOCK, gflags))
1832                 *p++ = 'l';
1833         if (test_bit(GLF_DEMOTE, gflags))
1834                 *p++ = 'D';
1835         if (test_bit(GLF_PENDING_DEMOTE, gflags))
1836                 *p++ = 'd';
1837         if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
1838                 *p++ = 'p';
1839         if (test_bit(GLF_DIRTY, gflags))
1840                 *p++ = 'y';
1841         if (test_bit(GLF_LFLUSH, gflags))
1842                 *p++ = 'f';
1843         if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
1844                 *p++ = 'i';
1845         if (test_bit(GLF_REPLY_PENDING, gflags))
1846                 *p++ = 'r';
1847         if (test_bit(GLF_INITIAL, gflags))
1848                 *p++ = 'I';
1849         if (test_bit(GLF_FROZEN, gflags))
1850                 *p++ = 'F';
1851         if (test_bit(GLF_QUEUED, gflags))
1852                 *p++ = 'q';
1853         if (test_bit(GLF_LRU, gflags))
1854                 *p++ = 'L';
1855         if (gl->gl_object)
1856                 *p++ = 'o';
1857         if (test_bit(GLF_BLOCKING, gflags))
1858                 *p++ = 'b';
1859         *p = 0;
1860         return buf;
1861 }
1862
1863 /**
1864  * gfs2_dump_glock - print information about a glock
1865  * @seq: The seq_file struct
1866  * @gl: the glock
1867  * @fsid: If true, also dump the file system id
1868  *
1869  * The file format is as follows:
1870  * One line per object, capital letters are used to indicate objects
1871  * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
1872  * other objects are indented by a single space and follow the glock to
1873  * which they are related. Fields are indicated by lower case letters
1874  * followed by a colon and the field value, except for strings which are in
1875  * [] so that its possible to see if they are composed of spaces for
1876  * example. The field's are n = number (id of the object), f = flags,
1877  * t = type, s = state, r = refcount, e = error, p = pid.
1878  *
1879  */
1880
1881 void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
1882 {
1883         const struct gfs2_glock_operations *glops = gl->gl_ops;
1884         unsigned long long dtime;
1885         const struct gfs2_holder *gh;
1886         char gflags_buf[32];
1887         struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1888         char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
1889
1890         memset(fs_id_buf, 0, sizeof(fs_id_buf));
1891         if (fsid && sdp) /* safety precaution */
1892                 sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
1893         dtime = jiffies - gl->gl_demote_time;
1894         dtime *= 1000000/HZ; /* demote time in uSec */
1895         if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1896                 dtime = 0;
1897         gfs2_print_dbg(seq, "%sG:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
1898                        "v:%d r:%d m:%ld\n", fs_id_buf, state2str(gl->gl_state),
1899                   gl->gl_name.ln_type,
1900                   (unsigned long long)gl->gl_name.ln_number,
1901                   gflags2str(gflags_buf, gl),
1902                   state2str(gl->gl_target),
1903                   state2str(gl->gl_demote_state), dtime,
1904                   atomic_read(&gl->gl_ail_count),
1905                   atomic_read(&gl->gl_revokes),
1906                   (int)gl->gl_lockref.count, gl->gl_hold_time);
1907
1908         list_for_each_entry(gh, &gl->gl_holders, gh_list)
1909                 dump_holder(seq, gh, fs_id_buf);
1910
1911         if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
1912                 glops->go_dump(seq, gl, fs_id_buf);
1913 }
1914
1915 static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
1916 {
1917         struct gfs2_glock *gl = iter_ptr;
1918
1919         seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n",
1920                    gl->gl_name.ln_type,
1921                    (unsigned long long)gl->gl_name.ln_number,
1922                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
1923                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
1924                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
1925                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
1926                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
1927                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
1928                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
1929                    (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
1930         return 0;
1931 }
1932
1933 static const char *gfs2_gltype[] = {
1934         "type",
1935         "reserved",
1936         "nondisk",
1937         "inode",
1938         "rgrp",
1939         "meta",
1940         "iopen",
1941         "flock",
1942         "plock",
1943         "quota",
1944         "journal",
1945 };
1946
1947 static const char *gfs2_stype[] = {
1948         [GFS2_LKS_SRTT]         = "srtt",
1949         [GFS2_LKS_SRTTVAR]      = "srttvar",
1950         [GFS2_LKS_SRTTB]        = "srttb",
1951         [GFS2_LKS_SRTTVARB]     = "srttvarb",
1952         [GFS2_LKS_SIRT]         = "sirt",
1953         [GFS2_LKS_SIRTVAR]      = "sirtvar",
1954         [GFS2_LKS_DCOUNT]       = "dlm",
1955         [GFS2_LKS_QCOUNT]       = "queue",
1956 };
1957
1958 #define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))
1959
1960 static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
1961 {
1962         struct gfs2_sbd *sdp = seq->private;
1963         loff_t pos = *(loff_t *)iter_ptr;
1964         unsigned index = pos >> 3;
1965         unsigned subindex = pos & 0x07;
1966         int i;
1967
1968         if (index == 0 && subindex != 0)
1969                 return 0;
1970
1971         seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
1972                    (index == 0) ? "cpu": gfs2_stype[subindex]);
1973
1974         for_each_possible_cpu(i) {
1975                 const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
1976
1977                 if (index == 0)
1978                         seq_printf(seq, " %15u", i);
1979                 else
1980                         seq_printf(seq, " %15llu", (unsigned long long)lkstats->
1981                                    lkstats[index - 1].stats[subindex]);
1982         }
1983         seq_putc(seq, '\n');
1984         return 0;
1985 }
1986
1987 int __init gfs2_glock_init(void)
1988 {
1989         int i, ret;
1990
1991         ret = rhashtable_init(&gl_hash_table, &ht_parms);
1992         if (ret < 0)
1993                 return ret;
1994
1995         glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1996                                           WQ_HIGHPRI | WQ_FREEZABLE, 0);
1997         if (!glock_workqueue) {
1998                 rhashtable_destroy(&gl_hash_table);
1999                 return -ENOMEM;
2000         }
2001         gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
2002                                                 WQ_MEM_RECLAIM | WQ_FREEZABLE,
2003                                                 0);
2004         if (!gfs2_delete_workqueue) {
2005                 destroy_workqueue(glock_workqueue);
2006                 rhashtable_destroy(&gl_hash_table);
2007                 return -ENOMEM;
2008         }
2009
2010         ret = register_shrinker(&glock_shrinker);
2011         if (ret) {
2012                 destroy_workqueue(gfs2_delete_workqueue);
2013                 destroy_workqueue(glock_workqueue);
2014                 rhashtable_destroy(&gl_hash_table);
2015                 return ret;
2016         }
2017
2018         for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
2019                 init_waitqueue_head(glock_wait_table + i);
2020
2021         return 0;
2022 }
2023
2024 void gfs2_glock_exit(void)
2025 {
2026         unregister_shrinker(&glock_shrinker);
2027         rhashtable_destroy(&gl_hash_table);
2028         destroy_workqueue(glock_workqueue);
2029         destroy_workqueue(gfs2_delete_workqueue);
2030 }
2031
2032 static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
2033 {
2034         struct gfs2_glock *gl = gi->gl;
2035
2036         if (gl) {
2037                 if (n == 0)
2038                         return;
2039                 if (!lockref_put_not_zero(&gl->gl_lockref))
2040                         gfs2_glock_queue_put(gl);
2041         }
2042         for (;;) {
2043                 gl = rhashtable_walk_next(&gi->hti);
2044                 if (IS_ERR_OR_NULL(gl)) {
2045                         if (gl == ERR_PTR(-EAGAIN)) {
2046                                 n = 1;
2047                                 continue;
2048                         }
2049                         gl = NULL;
2050                         break;
2051                 }
2052                 if (gl->gl_name.ln_sbd != gi->sdp)
2053                         continue;
2054                 if (n <= 1) {
2055                         if (!lockref_get_not_dead(&gl->gl_lockref))
2056                                 continue;
2057                         break;
2058                 } else {
2059                         if (__lockref_is_dead(&gl->gl_lockref))
2060                                 continue;
2061                         n--;
2062                 }
2063         }
2064         gi->gl = gl;
2065 }
2066
2067 static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
2068         __acquires(RCU)
2069 {
2070         struct gfs2_glock_iter *gi = seq->private;
2071         loff_t n;
2072
2073         /*
2074          * We can either stay where we are, skip to the next hash table
2075          * entry, or start from the beginning.
2076          */
2077         if (*pos < gi->last_pos) {
2078                 rhashtable_walk_exit(&gi->hti);
2079                 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2080                 n = *pos + 1;
2081         } else {
2082                 n = *pos - gi->last_pos;
2083         }
2084
2085         rhashtable_walk_start(&gi->hti);
2086
2087         gfs2_glock_iter_next(gi, n);
2088         gi->last_pos = *pos;
2089         return gi->gl;
2090 }
2091
2092 static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
2093                                  loff_t *pos)
2094 {
2095         struct gfs2_glock_iter *gi = seq->private;
2096
2097         (*pos)++;
2098         gi->last_pos = *pos;
2099         gfs2_glock_iter_next(gi, 1);
2100         return gi->gl;
2101 }
2102
2103 static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
2104         __releases(RCU)
2105 {
2106         struct gfs2_glock_iter *gi = seq->private;
2107
2108         rhashtable_walk_stop(&gi->hti);
2109 }
2110
2111 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
2112 {
2113         dump_glock(seq, iter_ptr, false);
2114         return 0;
2115 }
2116
2117 static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
2118 {
2119         preempt_disable();
2120         if (*pos >= GFS2_NR_SBSTATS)
2121                 return NULL;
2122         return pos;
2123 }
2124
2125 static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
2126                                    loff_t *pos)
2127 {
2128         (*pos)++;
2129         if (*pos >= GFS2_NR_SBSTATS)
2130                 return NULL;
2131         return pos;
2132 }
2133
2134 static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
2135 {
2136         preempt_enable();
2137 }
2138
2139 static const struct seq_operations gfs2_glock_seq_ops = {
2140         .start = gfs2_glock_seq_start,
2141         .next  = gfs2_glock_seq_next,
2142         .stop  = gfs2_glock_seq_stop,
2143         .show  = gfs2_glock_seq_show,
2144 };
2145
2146 static const struct seq_operations gfs2_glstats_seq_ops = {
2147         .start = gfs2_glock_seq_start,
2148         .next  = gfs2_glock_seq_next,
2149         .stop  = gfs2_glock_seq_stop,
2150         .show  = gfs2_glstats_seq_show,
2151 };
2152
2153 static const struct seq_operations gfs2_sbstats_seq_ops = {
2154         .start = gfs2_sbstats_seq_start,
2155         .next  = gfs2_sbstats_seq_next,
2156         .stop  = gfs2_sbstats_seq_stop,
2157         .show  = gfs2_sbstats_seq_show,
2158 };
2159
2160 #define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
2161
2162 static int __gfs2_glocks_open(struct inode *inode, struct file *file,
2163                               const struct seq_operations *ops)
2164 {
2165         int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter));
2166         if (ret == 0) {
2167                 struct seq_file *seq = file->private_data;
2168                 struct gfs2_glock_iter *gi = seq->private;
2169
2170                 gi->sdp = inode->i_private;
2171                 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2172                 if (seq->buf)
2173                         seq->size = GFS2_SEQ_GOODSIZE;
2174                 /*
2175                  * Initially, we are "before" the first hash table entry; the
2176                  * first call to rhashtable_walk_next gets us the first entry.
2177                  */
2178                 gi->last_pos = -1;
2179                 gi->gl = NULL;
2180                 rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2181         }
2182         return ret;
2183 }
2184
2185 static int gfs2_glocks_open(struct inode *inode, struct file *file)
2186 {
2187         return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops);
2188 }
2189
2190 static int gfs2_glocks_release(struct inode *inode, struct file *file)
2191 {
2192         struct seq_file *seq = file->private_data;
2193         struct gfs2_glock_iter *gi = seq->private;
2194
2195         if (gi->gl)
2196                 gfs2_glock_put(gi->gl);
2197         rhashtable_walk_exit(&gi->hti);
2198         return seq_release_private(inode, file);
2199 }
2200
2201 static int gfs2_glstats_open(struct inode *inode, struct file *file)
2202 {
2203         return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
2204 }
2205
2206 static int gfs2_sbstats_open(struct inode *inode, struct file *file)
2207 {
2208         int ret = seq_open(file, &gfs2_sbstats_seq_ops);
2209         if (ret == 0) {
2210                 struct seq_file *seq = file->private_data;
2211                 seq->private = inode->i_private;  /* sdp */
2212         }
2213         return ret;
2214 }
2215
2216 static const struct file_operations gfs2_glocks_fops = {
2217         .owner   = THIS_MODULE,
2218         .open    = gfs2_glocks_open,
2219         .read    = seq_read,
2220         .llseek  = seq_lseek,
2221         .release = gfs2_glocks_release,
2222 };
2223
2224 static const struct file_operations gfs2_glstats_fops = {
2225         .owner   = THIS_MODULE,
2226         .open    = gfs2_glstats_open,
2227         .read    = seq_read,
2228         .llseek  = seq_lseek,
2229         .release = gfs2_glocks_release,
2230 };
2231
2232 static const struct file_operations gfs2_sbstats_fops = {
2233         .owner   = THIS_MODULE,
2234         .open    = gfs2_sbstats_open,
2235         .read    = seq_read,
2236         .llseek  = seq_lseek,
2237         .release = seq_release,
2238 };
2239
2240 void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
2241 {
2242         sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2243
2244         debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2245                             &gfs2_glocks_fops);
2246
2247         debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2248                             &gfs2_glstats_fops);
2249
2250         debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
2251                             &gfs2_sbstats_fops);
2252 }
2253
2254 void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
2255 {
2256         debugfs_remove_recursive(sdp->debugfs_dir);
2257         sdp->debugfs_dir = NULL;
2258 }
2259
2260 void gfs2_register_debugfs(void)
2261 {
2262         gfs2_root = debugfs_create_dir("gfs2", NULL);
2263 }
2264
2265 void gfs2_unregister_debugfs(void)
2266 {
2267         debugfs_remove(gfs2_root);
2268         gfs2_root = NULL;
2269 }