]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/afs/server.c
Merge branch 'for-5.3/uclogic' into for-linus
[linux.git] / fs / afs / server.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13
14 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
16
17 static void afs_inc_servers_outstanding(struct afs_net *net)
18 {
19         atomic_inc(&net->servers_outstanding);
20 }
21
22 static void afs_dec_servers_outstanding(struct afs_net *net)
23 {
24         if (atomic_dec_and_test(&net->servers_outstanding))
25                 wake_up_var(&net->servers_outstanding);
26 }
27
28 /*
29  * Find a server by one of its addresses.
30  */
31 struct afs_server *afs_find_server(struct afs_net *net,
32                                    const struct sockaddr_rxrpc *srx)
33 {
34         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
35         const struct afs_addr_list *alist;
36         struct afs_server *server = NULL;
37         unsigned int i;
38         bool ipv6 = true;
39         int seq = 0, diff;
40
41         if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
42             srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
43             srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
44                 ipv6 = false;
45
46         rcu_read_lock();
47
48         do {
49                 if (server)
50                         afs_put_server(net, server);
51                 server = NULL;
52                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
53
54                 if (ipv6) {
55                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
56                                 alist = rcu_dereference(server->addresses);
57                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
58                                         b = &alist->addrs[i].transport.sin6;
59                                         diff = ((u16 __force)a->sin6_port -
60                                                 (u16 __force)b->sin6_port);
61                                         if (diff == 0)
62                                                 diff = memcmp(&a->sin6_addr,
63                                                               &b->sin6_addr,
64                                                               sizeof(struct in6_addr));
65                                         if (diff == 0)
66                                                 goto found;
67                                 }
68                         }
69                 } else {
70                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
71                                 alist = rcu_dereference(server->addresses);
72                                 for (i = 0; i < alist->nr_ipv4; i++) {
73                                         b = &alist->addrs[i].transport.sin6;
74                                         diff = ((u16 __force)a->sin6_port -
75                                                 (u16 __force)b->sin6_port);
76                                         if (diff == 0)
77                                                 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
78                                                         (u32 __force)b->sin6_addr.s6_addr32[3]);
79                                         if (diff == 0)
80                                                 goto found;
81                                 }
82                         }
83                 }
84
85                 server = NULL;
86         found:
87                 if (server && !atomic_inc_not_zero(&server->usage))
88                         server = NULL;
89
90         } while (need_seqretry(&net->fs_addr_lock, seq));
91
92         done_seqretry(&net->fs_addr_lock, seq);
93
94         rcu_read_unlock();
95         return server;
96 }
97
98 /*
99  * Look up a server by its UUID
100  */
101 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
102 {
103         struct afs_server *server = NULL;
104         struct rb_node *p;
105         int diff, seq = 0;
106
107         _enter("%pU", uuid);
108
109         do {
110                 /* Unfortunately, rbtree walking doesn't give reliable results
111                  * under just the RCU read lock, so we have to check for
112                  * changes.
113                  */
114                 if (server)
115                         afs_put_server(net, server);
116                 server = NULL;
117
118                 read_seqbegin_or_lock(&net->fs_lock, &seq);
119
120                 p = net->fs_servers.rb_node;
121                 while (p) {
122                         server = rb_entry(p, struct afs_server, uuid_rb);
123
124                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
125                         if (diff < 0) {
126                                 p = p->rb_left;
127                         } else if (diff > 0) {
128                                 p = p->rb_right;
129                         } else {
130                                 afs_get_server(server);
131                                 break;
132                         }
133
134                         server = NULL;
135                 }
136         } while (need_seqretry(&net->fs_lock, seq));
137
138         done_seqretry(&net->fs_lock, seq);
139
140         _leave(" = %p", server);
141         return server;
142 }
143
144 /*
145  * Install a server record in the namespace tree
146  */
147 static struct afs_server *afs_install_server(struct afs_net *net,
148                                              struct afs_server *candidate)
149 {
150         const struct afs_addr_list *alist;
151         struct afs_server *server;
152         struct rb_node **pp, *p;
153         int ret = -EEXIST, diff;
154
155         _enter("%p", candidate);
156
157         write_seqlock(&net->fs_lock);
158
159         /* Firstly install the server in the UUID lookup tree */
160         pp = &net->fs_servers.rb_node;
161         p = NULL;
162         while (*pp) {
163                 p = *pp;
164                 _debug("- consider %p", p);
165                 server = rb_entry(p, struct afs_server, uuid_rb);
166                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
167                 if (diff < 0)
168                         pp = &(*pp)->rb_left;
169                 else if (diff > 0)
170                         pp = &(*pp)->rb_right;
171                 else
172                         goto exists;
173         }
174
175         server = candidate;
176         rb_link_node(&server->uuid_rb, p, pp);
177         rb_insert_color(&server->uuid_rb, &net->fs_servers);
178         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
179
180         write_seqlock(&net->fs_addr_lock);
181         alist = rcu_dereference_protected(server->addresses,
182                                           lockdep_is_held(&net->fs_addr_lock.lock));
183
184         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
185          * it in the IPv4 and/or IPv6 reverse-map lists.
186          *
187          * TODO: For speed we want to use something other than a flat list
188          * here; even sorting the list in terms of lowest address would help a
189          * bit, but anything we might want to do gets messy and memory
190          * intensive.
191          */
192         if (alist->nr_ipv4 > 0)
193                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
194         if (alist->nr_addrs > alist->nr_ipv4)
195                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
196
197         write_sequnlock(&net->fs_addr_lock);
198         ret = 0;
199
200 exists:
201         afs_get_server(server);
202         write_sequnlock(&net->fs_lock);
203         return server;
204 }
205
206 /*
207  * allocate a new server record
208  */
209 static struct afs_server *afs_alloc_server(struct afs_net *net,
210                                            const uuid_t *uuid,
211                                            struct afs_addr_list *alist)
212 {
213         struct afs_server *server;
214
215         _enter("");
216
217         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
218         if (!server)
219                 goto enomem;
220
221         atomic_set(&server->usage, 1);
222         RCU_INIT_POINTER(server->addresses, alist);
223         server->addr_version = alist->version;
224         server->uuid = *uuid;
225         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
226         rwlock_init(&server->fs_lock);
227         INIT_HLIST_HEAD(&server->cb_volumes);
228         rwlock_init(&server->cb_break_lock);
229         init_waitqueue_head(&server->probe_wq);
230         spin_lock_init(&server->probe_lock);
231
232         afs_inc_servers_outstanding(net);
233         _leave(" = %p", server);
234         return server;
235
236 enomem:
237         _leave(" = NULL [nomem]");
238         return NULL;
239 }
240
241 /*
242  * Look up an address record for a server
243  */
244 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
245                                                  struct key *key, const uuid_t *uuid)
246 {
247         struct afs_vl_cursor vc;
248         struct afs_addr_list *alist = NULL;
249         int ret;
250
251         ret = -ERESTARTSYS;
252         if (afs_begin_vlserver_operation(&vc, cell, key)) {
253                 while (afs_select_vlserver(&vc)) {
254                         if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
255                                 alist = afs_yfsvl_get_endpoints(&vc, uuid);
256                         else
257                                 alist = afs_vl_get_addrs_u(&vc, uuid);
258                 }
259
260                 ret = afs_end_vlserver_operation(&vc);
261         }
262
263         return ret < 0 ? ERR_PTR(ret) : alist;
264 }
265
266 /*
267  * Get or create a fileserver record.
268  */
269 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
270                                      const uuid_t *uuid)
271 {
272         struct afs_addr_list *alist;
273         struct afs_server *server, *candidate;
274
275         _enter("%p,%pU", cell->net, uuid);
276
277         server = afs_find_server_by_uuid(cell->net, uuid);
278         if (server)
279                 return server;
280
281         alist = afs_vl_lookup_addrs(cell, key, uuid);
282         if (IS_ERR(alist))
283                 return ERR_CAST(alist);
284
285         candidate = afs_alloc_server(cell->net, uuid, alist);
286         if (!candidate) {
287                 afs_put_addrlist(alist);
288                 return ERR_PTR(-ENOMEM);
289         }
290
291         server = afs_install_server(cell->net, candidate);
292         if (server != candidate) {
293                 afs_put_addrlist(alist);
294                 kfree(candidate);
295         }
296
297         _leave(" = %p{%d}", server, atomic_read(&server->usage));
298         return server;
299 }
300
301 /*
302  * Set the server timer to fire after a given delay, assuming it's not already
303  * set for an earlier time.
304  */
305 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
306 {
307         if (net->live) {
308                 afs_inc_servers_outstanding(net);
309                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
310                         afs_dec_servers_outstanding(net);
311         }
312 }
313
314 /*
315  * Server management timer.  We have an increment on fs_outstanding that we
316  * need to pass along to the work item.
317  */
318 void afs_servers_timer(struct timer_list *timer)
319 {
320         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
321
322         _enter("");
323         if (!queue_work(afs_wq, &net->fs_manager))
324                 afs_dec_servers_outstanding(net);
325 }
326
327 /*
328  * Release a reference on a server record.
329  */
330 void afs_put_server(struct afs_net *net, struct afs_server *server)
331 {
332         unsigned int usage;
333
334         if (!server)
335                 return;
336
337         server->put_time = ktime_get_real_seconds();
338
339         usage = atomic_dec_return(&server->usage);
340
341         _enter("{%u}", usage);
342
343         if (likely(usage > 0))
344                 return;
345
346         afs_set_server_timer(net, afs_server_gc_delay);
347 }
348
349 static void afs_server_rcu(struct rcu_head *rcu)
350 {
351         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
352
353         afs_put_addrlist(rcu_access_pointer(server->addresses));
354         kfree(server);
355 }
356
357 /*
358  * destroy a dead server
359  */
360 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
361 {
362         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
363         struct afs_addr_cursor ac = {
364                 .alist  = alist,
365                 .index  = alist->preferred,
366                 .error  = 0,
367         };
368         _enter("%p", server);
369
370         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
371                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
372
373         wait_var_event(&server->probe_outstanding,
374                        atomic_read(&server->probe_outstanding) == 0);
375
376         call_rcu(&server->rcu, afs_server_rcu);
377         afs_dec_servers_outstanding(net);
378 }
379
380 /*
381  * Garbage collect any expired servers.
382  */
383 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
384 {
385         struct afs_server *server;
386         bool deleted;
387         int usage;
388
389         while ((server = gc_list)) {
390                 gc_list = server->gc_next;
391
392                 write_seqlock(&net->fs_lock);
393                 usage = 1;
394                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
395                 if (deleted) {
396                         rb_erase(&server->uuid_rb, &net->fs_servers);
397                         hlist_del_rcu(&server->proc_link);
398                 }
399                 write_sequnlock(&net->fs_lock);
400
401                 if (deleted) {
402                         write_seqlock(&net->fs_addr_lock);
403                         if (!hlist_unhashed(&server->addr4_link))
404                                 hlist_del_rcu(&server->addr4_link);
405                         if (!hlist_unhashed(&server->addr6_link))
406                                 hlist_del_rcu(&server->addr6_link);
407                         write_sequnlock(&net->fs_addr_lock);
408                         afs_destroy_server(net, server);
409                 }
410         }
411 }
412
413 /*
414  * Manage the records of servers known to be within a network namespace.  This
415  * includes garbage collecting unused servers.
416  *
417  * Note also that we were given an increment on net->servers_outstanding by
418  * whoever queued us that we need to deal with before returning.
419  */
420 void afs_manage_servers(struct work_struct *work)
421 {
422         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
423         struct afs_server *gc_list = NULL;
424         struct rb_node *cursor;
425         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
426         bool purging = !net->live;
427
428         _enter("");
429
430         /* Trawl the server list looking for servers that have expired from
431          * lack of use.
432          */
433         read_seqlock_excl(&net->fs_lock);
434
435         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
436                 struct afs_server *server =
437                         rb_entry(cursor, struct afs_server, uuid_rb);
438                 int usage = atomic_read(&server->usage);
439
440                 _debug("manage %pU %u", &server->uuid, usage);
441
442                 ASSERTCMP(usage, >=, 1);
443                 ASSERTIFCMP(purging, usage, ==, 1);
444
445                 if (usage == 1) {
446                         time64_t expire_at = server->put_time;
447
448                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
449                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
450                                 expire_at += afs_server_gc_delay;
451                         if (purging || expire_at <= now) {
452                                 server->gc_next = gc_list;
453                                 gc_list = server;
454                         } else if (expire_at < next_manage) {
455                                 next_manage = expire_at;
456                         }
457                 }
458         }
459
460         read_sequnlock_excl(&net->fs_lock);
461
462         /* Update the timer on the way out.  We have to pass an increment on
463          * servers_outstanding in the namespace that we are in to the timer or
464          * the work scheduler.
465          */
466         if (!purging && next_manage < TIME64_MAX) {
467                 now = ktime_get_real_seconds();
468
469                 if (next_manage - now <= 0) {
470                         if (queue_work(afs_wq, &net->fs_manager))
471                                 afs_inc_servers_outstanding(net);
472                 } else {
473                         afs_set_server_timer(net, next_manage - now);
474                 }
475         }
476
477         afs_gc_servers(net, gc_list);
478
479         afs_dec_servers_outstanding(net);
480         _leave(" [%d]", atomic_read(&net->servers_outstanding));
481 }
482
483 static void afs_queue_server_manager(struct afs_net *net)
484 {
485         afs_inc_servers_outstanding(net);
486         if (!queue_work(afs_wq, &net->fs_manager))
487                 afs_dec_servers_outstanding(net);
488 }
489
490 /*
491  * Purge list of servers.
492  */
493 void afs_purge_servers(struct afs_net *net)
494 {
495         _enter("");
496
497         if (del_timer_sync(&net->fs_timer))
498                 atomic_dec(&net->servers_outstanding);
499
500         afs_queue_server_manager(net);
501
502         _debug("wait");
503         wait_var_event(&net->servers_outstanding,
504                        !atomic_read(&net->servers_outstanding));
505         _leave("");
506 }
507
508 /*
509  * Get an update for a server's address list.
510  */
511 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
512 {
513         struct afs_addr_list *alist, *discard;
514
515         _enter("");
516
517         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
518                                     &server->uuid);
519         if (IS_ERR(alist)) {
520                 if ((PTR_ERR(alist) == -ERESTARTSYS ||
521                      PTR_ERR(alist) == -EINTR) &&
522                     !(fc->flags & AFS_FS_CURSOR_INTR) &&
523                     server->addresses) {
524                         _leave(" = t [intr]");
525                         return true;
526                 }
527                 fc->error = PTR_ERR(alist);
528                 _leave(" = f [%d]", fc->error);
529                 return false;
530         }
531
532         discard = alist;
533         if (server->addr_version != alist->version) {
534                 write_lock(&server->fs_lock);
535                 discard = rcu_dereference_protected(server->addresses,
536                                                     lockdep_is_held(&server->fs_lock));
537                 rcu_assign_pointer(server->addresses, alist);
538                 server->addr_version = alist->version;
539                 write_unlock(&server->fs_lock);
540         }
541
542         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
543         afs_put_addrlist(discard);
544         _leave(" = t");
545         return true;
546 }
547
548 /*
549  * See if a server's address list needs updating.
550  */
551 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
552 {
553         time64_t now = ktime_get_real_seconds();
554         long diff;
555         bool success;
556         int ret, retries = 0;
557
558         _enter("");
559
560         ASSERT(server);
561
562 retry:
563         diff = READ_ONCE(server->update_at) - now;
564         if (diff > 0) {
565                 _leave(" = t [not now %ld]", diff);
566                 return true;
567         }
568
569         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
570                 success = afs_update_server_record(fc, server);
571                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
572                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
573                 _leave(" = %d", success);
574                 return success;
575         }
576
577         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
578                           TASK_INTERRUPTIBLE);
579         if (ret == -ERESTARTSYS) {
580                 if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
581                         _leave(" = t [intr]");
582                         return true;
583                 }
584                 fc->error = ret;
585                 _leave(" = f [intr]");
586                 return false;
587         }
588
589         retries++;
590         if (retries == 4) {
591                 _leave(" = f [stale]");
592                 ret = -ESTALE;
593                 return false;
594         }
595         goto retry;
596 }