]> asedeno.scripts.mit.edu Git - linux.git/blob - fs/afs/server.c
aa35cfae544001050e567bc0dbac2014b56d57cb
[linux.git] / fs / afs / server.c
1 /* AFS server record management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/sched.h>
13 #include <linux/slab.h>
14 #include "afs_fs.h"
15 #include "internal.h"
16 #include "protocol_yfs.h"
17
18 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
19 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
20
21 static void afs_inc_servers_outstanding(struct afs_net *net)
22 {
23         atomic_inc(&net->servers_outstanding);
24 }
25
26 static void afs_dec_servers_outstanding(struct afs_net *net)
27 {
28         if (atomic_dec_and_test(&net->servers_outstanding))
29                 wake_up_var(&net->servers_outstanding);
30 }
31
32 /*
33  * Find a server by one of its addresses.
34  */
35 struct afs_server *afs_find_server(struct afs_net *net,
36                                    const struct sockaddr_rxrpc *srx)
37 {
38         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
39         const struct afs_addr_list *alist;
40         struct afs_server *server = NULL;
41         unsigned int i;
42         bool ipv6 = true;
43         int seq = 0, diff;
44
45         if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
46             srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
47             srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
48                 ipv6 = false;
49
50         rcu_read_lock();
51
52         do {
53                 if (server)
54                         afs_put_server(net, server);
55                 server = NULL;
56                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
57
58                 if (ipv6) {
59                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
60                                 alist = rcu_dereference(server->addresses);
61                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
62                                         b = &alist->addrs[i].transport.sin6;
63                                         diff = ((u16 __force)a->sin6_port -
64                                                 (u16 __force)b->sin6_port);
65                                         if (diff == 0)
66                                                 diff = memcmp(&a->sin6_addr,
67                                                               &b->sin6_addr,
68                                                               sizeof(struct in6_addr));
69                                         if (diff == 0)
70                                                 goto found;
71                                 }
72                         }
73                 } else {
74                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
75                                 alist = rcu_dereference(server->addresses);
76                                 for (i = 0; i < alist->nr_ipv4; i++) {
77                                         b = &alist->addrs[i].transport.sin6;
78                                         diff = ((u16 __force)a->sin6_port -
79                                                 (u16 __force)b->sin6_port);
80                                         if (diff == 0)
81                                                 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
82                                                         (u32 __force)b->sin6_addr.s6_addr32[3]);
83                                         if (diff == 0)
84                                                 goto found;
85                                 }
86                         }
87                 }
88
89                 server = NULL;
90         found:
91                 if (server && !atomic_inc_not_zero(&server->usage))
92                         server = NULL;
93
94         } while (need_seqretry(&net->fs_addr_lock, seq));
95
96         done_seqretry(&net->fs_addr_lock, seq);
97
98         rcu_read_unlock();
99         return server;
100 }
101
102 /*
103  * Look up a server by its UUID
104  */
105 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
106 {
107         struct afs_server *server = NULL;
108         struct rb_node *p;
109         int diff, seq = 0;
110
111         _enter("%pU", uuid);
112
113         do {
114                 /* Unfortunately, rbtree walking doesn't give reliable results
115                  * under just the RCU read lock, so we have to check for
116                  * changes.
117                  */
118                 if (server)
119                         afs_put_server(net, server);
120                 server = NULL;
121
122                 read_seqbegin_or_lock(&net->fs_lock, &seq);
123
124                 p = net->fs_servers.rb_node;
125                 while (p) {
126                         server = rb_entry(p, struct afs_server, uuid_rb);
127
128                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
129                         if (diff < 0) {
130                                 p = p->rb_left;
131                         } else if (diff > 0) {
132                                 p = p->rb_right;
133                         } else {
134                                 afs_get_server(server);
135                                 break;
136                         }
137
138                         server = NULL;
139                 }
140         } while (need_seqretry(&net->fs_lock, seq));
141
142         done_seqretry(&net->fs_lock, seq);
143
144         _leave(" = %p", server);
145         return server;
146 }
147
148 /*
149  * Install a server record in the namespace tree
150  */
151 static struct afs_server *afs_install_server(struct afs_net *net,
152                                              struct afs_server *candidate)
153 {
154         const struct afs_addr_list *alist;
155         struct afs_server *server;
156         struct rb_node **pp, *p;
157         int ret = -EEXIST, diff;
158
159         _enter("%p", candidate);
160
161         write_seqlock(&net->fs_lock);
162
163         /* Firstly install the server in the UUID lookup tree */
164         pp = &net->fs_servers.rb_node;
165         p = NULL;
166         while (*pp) {
167                 p = *pp;
168                 _debug("- consider %p", p);
169                 server = rb_entry(p, struct afs_server, uuid_rb);
170                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
171                 if (diff < 0)
172                         pp = &(*pp)->rb_left;
173                 else if (diff > 0)
174                         pp = &(*pp)->rb_right;
175                 else
176                         goto exists;
177         }
178
179         server = candidate;
180         rb_link_node(&server->uuid_rb, p, pp);
181         rb_insert_color(&server->uuid_rb, &net->fs_servers);
182         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
183
184         write_seqlock(&net->fs_addr_lock);
185         alist = rcu_dereference_protected(server->addresses,
186                                           lockdep_is_held(&net->fs_addr_lock.lock));
187
188         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
189          * it in the IPv4 and/or IPv6 reverse-map lists.
190          *
191          * TODO: For speed we want to use something other than a flat list
192          * here; even sorting the list in terms of lowest address would help a
193          * bit, but anything we might want to do gets messy and memory
194          * intensive.
195          */
196         if (alist->nr_ipv4 > 0)
197                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
198         if (alist->nr_addrs > alist->nr_ipv4)
199                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
200
201         write_sequnlock(&net->fs_addr_lock);
202         ret = 0;
203
204 exists:
205         afs_get_server(server);
206         write_sequnlock(&net->fs_lock);
207         return server;
208 }
209
210 /*
211  * allocate a new server record
212  */
213 static struct afs_server *afs_alloc_server(struct afs_net *net,
214                                            const uuid_t *uuid,
215                                            struct afs_addr_list *alist)
216 {
217         struct afs_server *server;
218
219         _enter("");
220
221         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
222         if (!server)
223                 goto enomem;
224
225         atomic_set(&server->usage, 1);
226         RCU_INIT_POINTER(server->addresses, alist);
227         server->addr_version = alist->version;
228         server->uuid = *uuid;
229         server->flags = (1UL << AFS_SERVER_FL_NEW);
230         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
231         rwlock_init(&server->fs_lock);
232         INIT_HLIST_HEAD(&server->cb_volumes);
233         rwlock_init(&server->cb_break_lock);
234
235         afs_inc_servers_outstanding(net);
236         _leave(" = %p", server);
237         return server;
238
239 enomem:
240         _leave(" = NULL [nomem]");
241         return NULL;
242 }
243
244 /*
245  * Look up an address record for a server
246  */
247 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
248                                                  struct key *key, const uuid_t *uuid)
249 {
250         struct afs_vl_cursor vc;
251         struct afs_addr_list *alist = NULL;
252         int ret;
253
254         ret = -ERESTARTSYS;
255         if (afs_begin_vlserver_operation(&vc, cell, key)) {
256                 while (afs_select_vlserver(&vc)) {
257                         if (test_bit(vc.ac.index, &vc.ac.alist->yfs))
258                                 alist = afs_yfsvl_get_endpoints(&vc, uuid);
259                         else
260                                 alist = afs_vl_get_addrs_u(&vc, uuid);
261                 }
262
263                 ret = afs_end_vlserver_operation(&vc);
264         }
265
266         return ret < 0 ? ERR_PTR(ret) : alist;
267 }
268
269 /*
270  * Get or create a fileserver record.
271  */
272 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
273                                      const uuid_t *uuid)
274 {
275         struct afs_addr_list *alist;
276         struct afs_server *server, *candidate;
277
278         _enter("%p,%pU", cell->net, uuid);
279
280         server = afs_find_server_by_uuid(cell->net, uuid);
281         if (server)
282                 return server;
283
284         alist = afs_vl_lookup_addrs(cell, key, uuid);
285         if (IS_ERR(alist))
286                 return ERR_CAST(alist);
287
288         candidate = afs_alloc_server(cell->net, uuid, alist);
289         if (!candidate) {
290                 afs_put_addrlist(alist);
291                 return ERR_PTR(-ENOMEM);
292         }
293
294         server = afs_install_server(cell->net, candidate);
295         if (server != candidate) {
296                 afs_put_addrlist(alist);
297                 kfree(candidate);
298         }
299
300         _leave(" = %p{%d}", server, atomic_read(&server->usage));
301         return server;
302 }
303
304 /*
305  * Set the server timer to fire after a given delay, assuming it's not already
306  * set for an earlier time.
307  */
308 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
309 {
310         if (net->live) {
311                 afs_inc_servers_outstanding(net);
312                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
313                         afs_dec_servers_outstanding(net);
314         }
315 }
316
317 /*
318  * Server management timer.  We have an increment on fs_outstanding that we
319  * need to pass along to the work item.
320  */
321 void afs_servers_timer(struct timer_list *timer)
322 {
323         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
324
325         _enter("");
326         if (!queue_work(afs_wq, &net->fs_manager))
327                 afs_dec_servers_outstanding(net);
328 }
329
330 /*
331  * Release a reference on a server record.
332  */
333 void afs_put_server(struct afs_net *net, struct afs_server *server)
334 {
335         unsigned int usage;
336
337         if (!server)
338                 return;
339
340         server->put_time = ktime_get_real_seconds();
341
342         usage = atomic_dec_return(&server->usage);
343
344         _enter("{%u}", usage);
345
346         if (likely(usage > 0))
347                 return;
348
349         afs_set_server_timer(net, afs_server_gc_delay);
350 }
351
352 static void afs_server_rcu(struct rcu_head *rcu)
353 {
354         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
355
356         afs_put_addrlist(rcu_access_pointer(server->addresses));
357         kfree(server);
358 }
359
360 /*
361  * destroy a dead server
362  */
363 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
364 {
365         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
366         struct afs_addr_cursor ac = {
367                 .alist  = alist,
368                 .start  = alist->index,
369                 .index  = 0,
370                 .addr   = &alist->addrs[alist->index],
371                 .error  = 0,
372         };
373         _enter("%p", server);
374
375         if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
376                 afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
377
378         call_rcu(&server->rcu, afs_server_rcu);
379         afs_dec_servers_outstanding(net);
380 }
381
382 /*
383  * Garbage collect any expired servers.
384  */
385 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
386 {
387         struct afs_server *server;
388         bool deleted;
389         int usage;
390
391         while ((server = gc_list)) {
392                 gc_list = server->gc_next;
393
394                 write_seqlock(&net->fs_lock);
395                 usage = 1;
396                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
397                 if (deleted) {
398                         rb_erase(&server->uuid_rb, &net->fs_servers);
399                         hlist_del_rcu(&server->proc_link);
400                 }
401                 write_sequnlock(&net->fs_lock);
402
403                 if (deleted) {
404                         write_seqlock(&net->fs_addr_lock);
405                         if (!hlist_unhashed(&server->addr4_link))
406                                 hlist_del_rcu(&server->addr4_link);
407                         if (!hlist_unhashed(&server->addr6_link))
408                                 hlist_del_rcu(&server->addr6_link);
409                         write_sequnlock(&net->fs_addr_lock);
410                         afs_destroy_server(net, server);
411                 }
412         }
413 }
414
415 /*
416  * Manage the records of servers known to be within a network namespace.  This
417  * includes garbage collecting unused servers.
418  *
419  * Note also that we were given an increment on net->servers_outstanding by
420  * whoever queued us that we need to deal with before returning.
421  */
422 void afs_manage_servers(struct work_struct *work)
423 {
424         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
425         struct afs_server *gc_list = NULL;
426         struct rb_node *cursor;
427         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
428         bool purging = !net->live;
429
430         _enter("");
431
432         /* Trawl the server list looking for servers that have expired from
433          * lack of use.
434          */
435         read_seqlock_excl(&net->fs_lock);
436
437         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
438                 struct afs_server *server =
439                         rb_entry(cursor, struct afs_server, uuid_rb);
440                 int usage = atomic_read(&server->usage);
441
442                 _debug("manage %pU %u", &server->uuid, usage);
443
444                 ASSERTCMP(usage, >=, 1);
445                 ASSERTIFCMP(purging, usage, ==, 1);
446
447                 if (usage == 1) {
448                         time64_t expire_at = server->put_time;
449
450                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
451                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
452                                 expire_at += afs_server_gc_delay;
453                         if (purging || expire_at <= now) {
454                                 server->gc_next = gc_list;
455                                 gc_list = server;
456                         } else if (expire_at < next_manage) {
457                                 next_manage = expire_at;
458                         }
459                 }
460         }
461
462         read_sequnlock_excl(&net->fs_lock);
463
464         /* Update the timer on the way out.  We have to pass an increment on
465          * servers_outstanding in the namespace that we are in to the timer or
466          * the work scheduler.
467          */
468         if (!purging && next_manage < TIME64_MAX) {
469                 now = ktime_get_real_seconds();
470
471                 if (next_manage - now <= 0) {
472                         if (queue_work(afs_wq, &net->fs_manager))
473                                 afs_inc_servers_outstanding(net);
474                 } else {
475                         afs_set_server_timer(net, next_manage - now);
476                 }
477         }
478
479         afs_gc_servers(net, gc_list);
480
481         afs_dec_servers_outstanding(net);
482         _leave(" [%d]", atomic_read(&net->servers_outstanding));
483 }
484
485 static void afs_queue_server_manager(struct afs_net *net)
486 {
487         afs_inc_servers_outstanding(net);
488         if (!queue_work(afs_wq, &net->fs_manager))
489                 afs_dec_servers_outstanding(net);
490 }
491
492 /*
493  * Purge list of servers.
494  */
495 void afs_purge_servers(struct afs_net *net)
496 {
497         _enter("");
498
499         if (del_timer_sync(&net->fs_timer))
500                 atomic_dec(&net->servers_outstanding);
501
502         afs_queue_server_manager(net);
503
504         _debug("wait");
505         wait_var_event(&net->servers_outstanding,
506                        !atomic_read(&net->servers_outstanding));
507         _leave("");
508 }
509
510 /*
511  * Probe a fileserver to find its capabilities.
512  *
513  * TODO: Try service upgrade.
514  */
515 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
516 {
517         int i;
518
519         _enter("");
520
521         fc->ac.addr = NULL;
522         fc->ac.start = READ_ONCE(fc->ac.alist->index);
523         fc->ac.index = fc->ac.start;
524         fc->ac.error = 0;
525         fc->ac.begun = false;
526
527         while (afs_iterate_addresses(&fc->ac)) {
528                 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
529                                         &fc->ac, fc->key);
530                 switch (fc->ac.error) {
531                 case 0:
532                         if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) {
533                                 for (i = 0; i < fc->ac.alist->nr_addrs; i++)
534                                         fc->ac.alist->addrs[i].srx_service =
535                                                 YFS_FS_SERVICE;
536                         }
537                         afs_end_cursor(&fc->ac);
538                         set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
539                         return true;
540                 case -ECONNABORTED:
541                         fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
542                         goto error;
543                 case -ENOMEM:
544                 case -ENONET:
545                         goto error;
546                 case -ENETUNREACH:
547                 case -EHOSTUNREACH:
548                 case -ECONNREFUSED:
549                 case -ETIMEDOUT:
550                 case -ETIME:
551                         break;
552                 default:
553                         fc->ac.error = afs_io_error(NULL, afs_io_error_fs_probe_fail);
554                         goto error;
555                 }
556         }
557
558 error:
559         afs_end_cursor(&fc->ac);
560         return false;
561 }
562
563 /*
564  * If we haven't already, try probing the fileserver to get its capabilities.
565  * We try not to instigate parallel probes, but it's possible that the parallel
566  * probes will fail due to authentication failure when ours would succeed.
567  *
568  * TODO: Try sending an anonymous probe if an authenticated probe fails.
569  */
570 bool afs_probe_fileserver(struct afs_fs_cursor *fc)
571 {
572         bool success;
573         int ret, retries = 0;
574
575         _enter("");
576
577 retry:
578         if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
579                 _leave(" = t");
580                 return true;
581         }
582
583         if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
584                 success = afs_do_probe_fileserver(fc);
585                 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
586                 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
587                 _leave(" = t");
588                 return success;
589         }
590
591         _debug("wait");
592         ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
593                           TASK_INTERRUPTIBLE);
594         if (ret == -ERESTARTSYS) {
595                 fc->ac.error = ret;
596                 _leave(" = f [%d]", ret);
597                 return false;
598         }
599
600         retries++;
601         if (retries == 4) {
602                 fc->ac.error = -ESTALE;
603                 _leave(" = f [stale]");
604                 return false;
605         }
606         _debug("retry");
607         goto retry;
608 }
609
610 /*
611  * Get an update for a server's address list.
612  */
613 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
614 {
615         struct afs_addr_list *alist, *discard;
616
617         _enter("");
618
619         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
620                                     &server->uuid);
621         if (IS_ERR(alist)) {
622                 fc->ac.error = PTR_ERR(alist);
623                 _leave(" = f [%d]", fc->ac.error);
624                 return false;
625         }
626
627         discard = alist;
628         if (server->addr_version != alist->version) {
629                 write_lock(&server->fs_lock);
630                 discard = rcu_dereference_protected(server->addresses,
631                                                     lockdep_is_held(&server->fs_lock));
632                 rcu_assign_pointer(server->addresses, alist);
633                 server->addr_version = alist->version;
634                 write_unlock(&server->fs_lock);
635         }
636
637         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
638         afs_put_addrlist(discard);
639         _leave(" = t");
640         return true;
641 }
642
643 /*
644  * See if a server's address list needs updating.
645  */
646 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
647 {
648         time64_t now = ktime_get_real_seconds();
649         long diff;
650         bool success;
651         int ret, retries = 0;
652
653         _enter("");
654
655         ASSERT(server);
656
657 retry:
658         diff = READ_ONCE(server->update_at) - now;
659         if (diff > 0) {
660                 _leave(" = t [not now %ld]", diff);
661                 return true;
662         }
663
664         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
665                 success = afs_update_server_record(fc, server);
666                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
667                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
668                 _leave(" = %d", success);
669                 return success;
670         }
671
672         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
673                           TASK_INTERRUPTIBLE);
674         if (ret == -ERESTARTSYS) {
675                 fc->ac.error = ret;
676                 _leave(" = f [intr]");
677                 return false;
678         }
679
680         retries++;
681         if (retries == 4) {
682                 _leave(" = f [stale]");
683                 ret = -ESTALE;
684                 return false;
685         }
686         goto retry;
687 }