]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/afs/server.c
Merge branch 'for-linus' into for-next
[linux.git] / fs / afs / server.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS server record management
3  *
4  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13
14 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
15 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
16 static atomic_t afs_server_debug_id;
17
18 static void afs_inc_servers_outstanding(struct afs_net *net)
19 {
20         atomic_inc(&net->servers_outstanding);
21 }
22
23 static void afs_dec_servers_outstanding(struct afs_net *net)
24 {
25         if (atomic_dec_and_test(&net->servers_outstanding))
26                 wake_up_var(&net->servers_outstanding);
27 }
28
29 /*
30  * Find a server by one of its addresses.
31  */
32 struct afs_server *afs_find_server(struct afs_net *net,
33                                    const struct sockaddr_rxrpc *srx)
34 {
35         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
36         const struct afs_addr_list *alist;
37         struct afs_server *server = NULL;
38         unsigned int i;
39         bool ipv6 = true;
40         int seq = 0, diff;
41
42         if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
43             srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
44             srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
45                 ipv6 = false;
46
47         rcu_read_lock();
48
49         do {
50                 if (server)
51                         afs_put_server(net, server, afs_server_trace_put_find_rsq);
52                 server = NULL;
53                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
54
55                 if (ipv6) {
56                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
57                                 alist = rcu_dereference(server->addresses);
58                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
59                                         b = &alist->addrs[i].transport.sin6;
60                                         diff = ((u16 __force)a->sin6_port -
61                                                 (u16 __force)b->sin6_port);
62                                         if (diff == 0)
63                                                 diff = memcmp(&a->sin6_addr,
64                                                               &b->sin6_addr,
65                                                               sizeof(struct in6_addr));
66                                         if (diff == 0)
67                                                 goto found;
68                                 }
69                         }
70                 } else {
71                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
72                                 alist = rcu_dereference(server->addresses);
73                                 for (i = 0; i < alist->nr_ipv4; i++) {
74                                         b = &alist->addrs[i].transport.sin6;
75                                         diff = ((u16 __force)a->sin6_port -
76                                                 (u16 __force)b->sin6_port);
77                                         if (diff == 0)
78                                                 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
79                                                         (u32 __force)b->sin6_addr.s6_addr32[3]);
80                                         if (diff == 0)
81                                                 goto found;
82                                 }
83                         }
84                 }
85
86                 server = NULL;
87         found:
88                 if (server && !atomic_inc_not_zero(&server->usage))
89                         server = NULL;
90
91         } while (need_seqretry(&net->fs_addr_lock, seq));
92
93         done_seqretry(&net->fs_addr_lock, seq);
94
95         rcu_read_unlock();
96         return server;
97 }
98
99 /*
100  * Look up a server by its UUID
101  */
102 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
103 {
104         struct afs_server *server = NULL;
105         struct rb_node *p;
106         int diff, seq = 0;
107
108         _enter("%pU", uuid);
109
110         do {
111                 /* Unfortunately, rbtree walking doesn't give reliable results
112                  * under just the RCU read lock, so we have to check for
113                  * changes.
114                  */
115                 if (server)
116                         afs_put_server(net, server, afs_server_trace_put_uuid_rsq);
117                 server = NULL;
118
119                 read_seqbegin_or_lock(&net->fs_lock, &seq);
120
121                 p = net->fs_servers.rb_node;
122                 while (p) {
123                         server = rb_entry(p, struct afs_server, uuid_rb);
124
125                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
126                         if (diff < 0) {
127                                 p = p->rb_left;
128                         } else if (diff > 0) {
129                                 p = p->rb_right;
130                         } else {
131                                 afs_get_server(server, afs_server_trace_get_by_uuid);
132                                 break;
133                         }
134
135                         server = NULL;
136                 }
137         } while (need_seqretry(&net->fs_lock, seq));
138
139         done_seqretry(&net->fs_lock, seq);
140
141         _leave(" = %p", server);
142         return server;
143 }
144
145 /*
146  * Install a server record in the namespace tree
147  */
148 static struct afs_server *afs_install_server(struct afs_net *net,
149                                              struct afs_server *candidate)
150 {
151         const struct afs_addr_list *alist;
152         struct afs_server *server;
153         struct rb_node **pp, *p;
154         int diff;
155
156         _enter("%p", candidate);
157
158         write_seqlock(&net->fs_lock);
159
160         /* Firstly install the server in the UUID lookup tree */
161         pp = &net->fs_servers.rb_node;
162         p = NULL;
163         while (*pp) {
164                 p = *pp;
165                 _debug("- consider %p", p);
166                 server = rb_entry(p, struct afs_server, uuid_rb);
167                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
168                 if (diff < 0)
169                         pp = &(*pp)->rb_left;
170                 else if (diff > 0)
171                         pp = &(*pp)->rb_right;
172                 else
173                         goto exists;
174         }
175
176         server = candidate;
177         rb_link_node(&server->uuid_rb, p, pp);
178         rb_insert_color(&server->uuid_rb, &net->fs_servers);
179         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
180
181         write_seqlock(&net->fs_addr_lock);
182         alist = rcu_dereference_protected(server->addresses,
183                                           lockdep_is_held(&net->fs_addr_lock.lock));
184
185         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
186          * it in the IPv4 and/or IPv6 reverse-map lists.
187          *
188          * TODO: For speed we want to use something other than a flat list
189          * here; even sorting the list in terms of lowest address would help a
190          * bit, but anything we might want to do gets messy and memory
191          * intensive.
192          */
193         if (alist->nr_ipv4 > 0)
194                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
195         if (alist->nr_addrs > alist->nr_ipv4)
196                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
197
198         write_sequnlock(&net->fs_addr_lock);
199
200 exists:
201         afs_get_server(server, afs_server_trace_get_install);
202         write_sequnlock(&net->fs_lock);
203         return server;
204 }
205
206 /*
207  * allocate a new server record
208  */
209 static struct afs_server *afs_alloc_server(struct afs_net *net,
210                                            const uuid_t *uuid,
211                                            struct afs_addr_list *alist)
212 {
213         struct afs_server *server;
214
215         _enter("");
216
217         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
218         if (!server)
219                 goto enomem;
220
221         atomic_set(&server->usage, 1);
222         server->debug_id = atomic_inc_return(&afs_server_debug_id);
223         RCU_INIT_POINTER(server->addresses, alist);
224         server->addr_version = alist->version;
225         server->uuid = *uuid;
226         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
227         rwlock_init(&server->fs_lock);
228         INIT_HLIST_HEAD(&server->cb_volumes);
229         rwlock_init(&server->cb_break_lock);
230         init_waitqueue_head(&server->probe_wq);
231         spin_lock_init(&server->probe_lock);
232
233         afs_inc_servers_outstanding(net);
234         trace_afs_server(server, 1, afs_server_trace_alloc);
235         _leave(" = %p", server);
236         return server;
237
238 enomem:
239         _leave(" = NULL [nomem]");
240         return NULL;
241 }
242
243 /*
244  * Look up an address record for a server
245  */
246 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
247                                                  struct key *key, const uuid_t *uuid)
248 {
249         struct afs_vl_cursor vc;
250         struct afs_addr_list *alist = NULL;
251         int ret;
252
253         ret = -ERESTARTSYS;
254         if (afs_begin_vlserver_operation(&vc, cell, key)) {
255                 while (afs_select_vlserver(&vc)) {
256                         if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
257                                 alist = afs_yfsvl_get_endpoints(&vc, uuid);
258                         else
259                                 alist = afs_vl_get_addrs_u(&vc, uuid);
260                 }
261
262                 ret = afs_end_vlserver_operation(&vc);
263         }
264
265         return ret < 0 ? ERR_PTR(ret) : alist;
266 }
267
268 /*
269  * Get or create a fileserver record.
270  */
271 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
272                                      const uuid_t *uuid)
273 {
274         struct afs_addr_list *alist;
275         struct afs_server *server, *candidate;
276
277         _enter("%p,%pU", cell->net, uuid);
278
279         server = afs_find_server_by_uuid(cell->net, uuid);
280         if (server)
281                 return server;
282
283         alist = afs_vl_lookup_addrs(cell, key, uuid);
284         if (IS_ERR(alist))
285                 return ERR_CAST(alist);
286
287         candidate = afs_alloc_server(cell->net, uuid, alist);
288         if (!candidate) {
289                 afs_put_addrlist(alist);
290                 return ERR_PTR(-ENOMEM);
291         }
292
293         server = afs_install_server(cell->net, candidate);
294         if (server != candidate) {
295                 afs_put_addrlist(alist);
296                 kfree(candidate);
297         }
298
299         _leave(" = %p{%d}", server, atomic_read(&server->usage));
300         return server;
301 }
302
303 /*
304  * Set the server timer to fire after a given delay, assuming it's not already
305  * set for an earlier time.
306  */
307 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
308 {
309         if (net->live) {
310                 afs_inc_servers_outstanding(net);
311                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
312                         afs_dec_servers_outstanding(net);
313         }
314 }
315
316 /*
317  * Server management timer.  We have an increment on fs_outstanding that we
318  * need to pass along to the work item.
319  */
320 void afs_servers_timer(struct timer_list *timer)
321 {
322         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
323
324         _enter("");
325         if (!queue_work(afs_wq, &net->fs_manager))
326                 afs_dec_servers_outstanding(net);
327 }
328
329 /*
330  * Get a reference on a server object.
331  */
332 struct afs_server *afs_get_server(struct afs_server *server,
333                                   enum afs_server_trace reason)
334 {
335         unsigned int u = atomic_inc_return(&server->usage);
336
337         trace_afs_server(server, u, reason);
338         return server;
339 }
340
341 /*
342  * Release a reference on a server record.
343  */
344 void afs_put_server(struct afs_net *net, struct afs_server *server,
345                     enum afs_server_trace reason)
346 {
347         unsigned int usage;
348
349         if (!server)
350                 return;
351
352         server->put_time = ktime_get_real_seconds();
353
354         usage = atomic_dec_return(&server->usage);
355
356         trace_afs_server(server, usage, reason);
357
358         if (likely(usage > 0))
359                 return;
360
361         afs_set_server_timer(net, afs_server_gc_delay);
362 }
363
364 static void afs_server_rcu(struct rcu_head *rcu)
365 {
366         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
367
368         trace_afs_server(server, atomic_read(&server->usage),
369                          afs_server_trace_free);
370         afs_put_addrlist(rcu_access_pointer(server->addresses));
371         kfree(server);
372 }
373
374 /*
375  * destroy a dead server
376  */
377 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
378 {
379         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
380         struct afs_addr_cursor ac = {
381                 .alist  = alist,
382                 .index  = alist->preferred,
383                 .error  = 0,
384         };
385
386         trace_afs_server(server, atomic_read(&server->usage),
387                          afs_server_trace_give_up_cb);
388
389         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
390                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
391
392         wait_var_event(&server->probe_outstanding,
393                        atomic_read(&server->probe_outstanding) == 0);
394
395         trace_afs_server(server, atomic_read(&server->usage),
396                          afs_server_trace_destroy);
397         call_rcu(&server->rcu, afs_server_rcu);
398         afs_dec_servers_outstanding(net);
399 }
400
401 /*
402  * Garbage collect any expired servers.
403  */
404 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
405 {
406         struct afs_server *server;
407         bool deleted;
408         int usage;
409
410         while ((server = gc_list)) {
411                 gc_list = server->gc_next;
412
413                 write_seqlock(&net->fs_lock);
414                 usage = 1;
415                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
416                 trace_afs_server(server, usage, afs_server_trace_gc);
417                 if (deleted) {
418                         rb_erase(&server->uuid_rb, &net->fs_servers);
419                         hlist_del_rcu(&server->proc_link);
420                 }
421                 write_sequnlock(&net->fs_lock);
422
423                 if (deleted) {
424                         write_seqlock(&net->fs_addr_lock);
425                         if (!hlist_unhashed(&server->addr4_link))
426                                 hlist_del_rcu(&server->addr4_link);
427                         if (!hlist_unhashed(&server->addr6_link))
428                                 hlist_del_rcu(&server->addr6_link);
429                         write_sequnlock(&net->fs_addr_lock);
430                         afs_destroy_server(net, server);
431                 }
432         }
433 }
434
435 /*
436  * Manage the records of servers known to be within a network namespace.  This
437  * includes garbage collecting unused servers.
438  *
439  * Note also that we were given an increment on net->servers_outstanding by
440  * whoever queued us that we need to deal with before returning.
441  */
442 void afs_manage_servers(struct work_struct *work)
443 {
444         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
445         struct afs_server *gc_list = NULL;
446         struct rb_node *cursor;
447         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
448         bool purging = !net->live;
449
450         _enter("");
451
452         /* Trawl the server list looking for servers that have expired from
453          * lack of use.
454          */
455         read_seqlock_excl(&net->fs_lock);
456
457         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
458                 struct afs_server *server =
459                         rb_entry(cursor, struct afs_server, uuid_rb);
460                 int usage = atomic_read(&server->usage);
461
462                 _debug("manage %pU %u", &server->uuid, usage);
463
464                 ASSERTCMP(usage, >=, 1);
465                 ASSERTIFCMP(purging, usage, ==, 1);
466
467                 if (usage == 1) {
468                         time64_t expire_at = server->put_time;
469
470                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
471                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
472                                 expire_at += afs_server_gc_delay;
473                         if (purging || expire_at <= now) {
474                                 server->gc_next = gc_list;
475                                 gc_list = server;
476                         } else if (expire_at < next_manage) {
477                                 next_manage = expire_at;
478                         }
479                 }
480         }
481
482         read_sequnlock_excl(&net->fs_lock);
483
484         /* Update the timer on the way out.  We have to pass an increment on
485          * servers_outstanding in the namespace that we are in to the timer or
486          * the work scheduler.
487          */
488         if (!purging && next_manage < TIME64_MAX) {
489                 now = ktime_get_real_seconds();
490
491                 if (next_manage - now <= 0) {
492                         if (queue_work(afs_wq, &net->fs_manager))
493                                 afs_inc_servers_outstanding(net);
494                 } else {
495                         afs_set_server_timer(net, next_manage - now);
496                 }
497         }
498
499         afs_gc_servers(net, gc_list);
500
501         afs_dec_servers_outstanding(net);
502         _leave(" [%d]", atomic_read(&net->servers_outstanding));
503 }
504
505 static void afs_queue_server_manager(struct afs_net *net)
506 {
507         afs_inc_servers_outstanding(net);
508         if (!queue_work(afs_wq, &net->fs_manager))
509                 afs_dec_servers_outstanding(net);
510 }
511
512 /*
513  * Purge list of servers.
514  */
515 void afs_purge_servers(struct afs_net *net)
516 {
517         _enter("");
518
519         if (del_timer_sync(&net->fs_timer))
520                 atomic_dec(&net->servers_outstanding);
521
522         afs_queue_server_manager(net);
523
524         _debug("wait");
525         wait_var_event(&net->servers_outstanding,
526                        !atomic_read(&net->servers_outstanding));
527         _leave("");
528 }
529
530 /*
531  * Get an update for a server's address list.
532  */
533 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
534 {
535         struct afs_addr_list *alist, *discard;
536
537         _enter("");
538
539         trace_afs_server(server, atomic_read(&server->usage), afs_server_trace_update);
540
541         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
542                                     &server->uuid);
543         if (IS_ERR(alist)) {
544                 if ((PTR_ERR(alist) == -ERESTARTSYS ||
545                      PTR_ERR(alist) == -EINTR) &&
546                     !(fc->flags & AFS_FS_CURSOR_INTR) &&
547                     server->addresses) {
548                         _leave(" = t [intr]");
549                         return true;
550                 }
551                 fc->error = PTR_ERR(alist);
552                 _leave(" = f [%d]", fc->error);
553                 return false;
554         }
555
556         discard = alist;
557         if (server->addr_version != alist->version) {
558                 write_lock(&server->fs_lock);
559                 discard = rcu_dereference_protected(server->addresses,
560                                                     lockdep_is_held(&server->fs_lock));
561                 rcu_assign_pointer(server->addresses, alist);
562                 server->addr_version = alist->version;
563                 write_unlock(&server->fs_lock);
564         }
565
566         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
567         afs_put_addrlist(discard);
568         _leave(" = t");
569         return true;
570 }
571
572 /*
573  * See if a server's address list needs updating.
574  */
575 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
576 {
577         time64_t now = ktime_get_real_seconds();
578         long diff;
579         bool success;
580         int ret, retries = 0;
581
582         _enter("");
583
584         ASSERT(server);
585
586 retry:
587         diff = READ_ONCE(server->update_at) - now;
588         if (diff > 0) {
589                 _leave(" = t [not now %ld]", diff);
590                 return true;
591         }
592
593         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
594                 success = afs_update_server_record(fc, server);
595                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
596                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
597                 _leave(" = %d", success);
598                 return success;
599         }
600
601         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
602                           TASK_INTERRUPTIBLE);
603         if (ret == -ERESTARTSYS) {
604                 if (!(fc->flags & AFS_FS_CURSOR_INTR) && server->addresses) {
605                         _leave(" = t [intr]");
606                         return true;
607                 }
608                 fc->error = ret;
609                 _leave(" = f [intr]");
610                 return false;
611         }
612
613         retries++;
614         if (retries == 4) {
615                 _leave(" = f [stale]");
616                 ret = -ESTALE;
617                 return false;
618         }
619         goto retry;
620 }