]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
afs: Fix missing net error handling
authorDavid Howells <dhowells@redhat.com>
Tue, 13 Nov 2018 23:20:28 +0000 (23:20 +0000)
committerAl Viro <viro@zeniv.linux.org.uk>
Fri, 30 Nov 2018 02:08:14 +0000 (21:08 -0500)
kAFS can be given certain network errors (EADDRNOTAVAIL, EHOSTDOWN and
ERFKILL) that it doesn't handle in its server/address rotation algorithms.
They cause the probing and rotation to abort immediately rather than
rotating.

Fix this by:

 (1) Abstracting out the error prioritisation from the VL and FS rotation
     algorithms into a common function and expand usage into the server
     probing code.

     When multiple errors are available, this code selects the one we'd
     prefer to return.

 (2) Add handling for EADDRNOTAVAIL, EHOSTDOWN and ERFKILL.

Fixes: 0fafdc9f888b ("afs: Fix file locking")
Fixes: 0338747d8454 ("afs: Probe multiple fileservers simultaneously")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/afs/fs_probe.c
fs/afs/internal.h
fs/afs/misc.c
fs/afs/rotate.c
fs/afs/vl_probe.c
fs/afs/vl_rotate.c

index d049cb4597425484d444be5e2d0b7b81b13fb116..fde6b4d4121e38532ea9ade21feb40b0a2bd09c6 100644 (file)
@@ -61,8 +61,11 @@ void afs_fileserver_probe_result(struct afs_call *call)
                afs_io_error(call, afs_io_error_fs_probe_fail);
                goto out;
        case -ECONNRESET: /* Responded, but call expired. */
+       case -ERFKILL:
+       case -EADDRNOTAVAIL:
        case -ENETUNREACH:
        case -EHOSTUNREACH:
+       case -EHOSTDOWN:
        case -ECONNREFUSED:
        case -ETIMEDOUT:
        case -ETIME:
@@ -132,12 +135,14 @@ void afs_fileserver_probe_result(struct afs_call *call)
 static int afs_do_probe_fileserver(struct afs_net *net,
                                   struct afs_server *server,
                                   struct key *key,
-                                  unsigned int server_index)
+                                  unsigned int server_index,
+                                  struct afs_error *_e)
 {
        struct afs_addr_cursor ac = {
                .index = 0,
        };
-       int ret;
+       bool in_progress = false;
+       int err;
 
        _enter("%pU", &server->uuid);
 
@@ -151,15 +156,17 @@ static int afs_do_probe_fileserver(struct afs_net *net,
        server->probe.rtt = UINT_MAX;
 
        for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
-               ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
+               err = afs_fs_get_capabilities(net, server, &ac, key, server_index,
                                              true);
-               if (ret != -EINPROGRESS) {
-                       afs_fs_probe_done(server);
-                       return ret;
-               }
+               if (err == -EINPROGRESS)
+                       in_progress = true;
+               else
+                       afs_prioritise_error(_e, err, ac.abort_code);
        }
 
-       return 0;
+       if (!in_progress)
+               afs_fs_probe_done(server);
+       return in_progress;
 }
 
 /*
@@ -169,21 +176,23 @@ int afs_probe_fileservers(struct afs_net *net, struct key *key,
                          struct afs_server_list *list)
 {
        struct afs_server *server;
-       int i, ret;
+       struct afs_error e;
+       bool in_progress = false;
+       int i;
 
+       e.error = 0;
+       e.responded = false;
        for (i = 0; i < list->nr_servers; i++) {
                server = list->servers[i].server;
                if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
                        continue;
 
-               if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
-                       ret = afs_do_probe_fileserver(net, server, key, i);
-                       if (ret)
-                               return ret;
-               }
+               if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags) &&
+                   afs_do_probe_fileserver(net, server, key, i, &e))
+                       in_progress = true;
        }
 
-       return 0;
+       return in_progress ? 0 : e.error;
 }
 
 /*
index 5da3b09b751867bc9c0bbb8c23b362fbab942a93..8871b9e8645f15ce0963745813c6de62cc8ea786 100644 (file)
@@ -695,6 +695,14 @@ struct afs_interface {
        unsigned        mtu;            /* MTU of interface */
 };
 
+/*
+ * Error prioritisation and accumulation.
+ */
+struct afs_error {
+       short   error;                  /* Accumulated error */
+       bool    responded;              /* T if server responded */
+};
+
 /*
  * Cursor for iterating over a server's address list.
  */
@@ -1015,6 +1023,7 @@ static inline void __afs_stat(atomic_t *s)
  * misc.c
  */
 extern int afs_abort_to_error(u32);
+extern void afs_prioritise_error(struct afs_error *, int, u32);
 
 /*
  * mntpt.c
index 700a5fa7f4ece2151c68f87ee58bd6d2e8e97dc3..bbb1fd51b019ead4d6cd5aeee6b26861dd73fe04 100644 (file)
@@ -118,3 +118,55 @@ int afs_abort_to_error(u32 abort_code)
        default:                return -EREMOTEIO;
        }
 }
+
+/*
+ * Select the error to report from a set of errors.
+ */
+void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
+{
+       switch (error) {
+       case 0:
+               return;
+       default:
+               if (e->error == -ETIMEDOUT ||
+                   e->error == -ETIME)
+                       return;
+       case -ETIMEDOUT:
+       case -ETIME:
+               if (e->error == -ENOMEM ||
+                   e->error == -ENONET)
+                       return;
+       case -ENOMEM:
+       case -ENONET:
+               if (e->error == -ERFKILL)
+                       return;
+       case -ERFKILL:
+               if (e->error == -EADDRNOTAVAIL)
+                       return;
+       case -EADDRNOTAVAIL:
+               if (e->error == -ENETUNREACH)
+                       return;
+       case -ENETUNREACH:
+               if (e->error == -EHOSTUNREACH)
+                       return;
+       case -EHOSTUNREACH:
+               if (e->error == -EHOSTDOWN)
+                       return;
+       case -EHOSTDOWN:
+               if (e->error == -ECONNREFUSED)
+                       return;
+       case -ECONNREFUSED:
+               if (e->error == -ECONNRESET)
+                       return;
+       case -ECONNRESET: /* Responded, but call expired. */
+               if (e->responded)
+                       return;
+               e->error = error;
+               return;
+
+       case -ECONNABORTED:
+               e->responded = true;
+               e->error = afs_abort_to_error(abort_code);
+               return;
+       }
+}
index 00504254c1c24b6186ec676edcfd1fdcdfffbc63..c3ae324781f846b8122b6b0a80085efaafdcaaba 100644 (file)
@@ -136,7 +136,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
        struct afs_addr_list *alist;
        struct afs_server *server;
        struct afs_vnode *vnode = fc->vnode;
-       u32 rtt, abort_code;
+       struct afs_error e;
+       u32 rtt;
        int error = fc->ac.error, i;
 
        _enter("%lx[%d],%lx[%d],%d,%d",
@@ -306,8 +307,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
                if (fc->error != -EDESTADDRREQ)
                        goto iterate_address;
                /* Fall through */
+       case -ERFKILL:
+       case -EADDRNOTAVAIL:
        case -ENETUNREACH:
        case -EHOSTUNREACH:
+       case -EHOSTDOWN:
        case -ECONNREFUSED:
                _debug("no conn");
                fc->error = error;
@@ -446,50 +450,15 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
        if (fc->flags & AFS_FS_CURSOR_VBUSY)
                goto restart_from_beginning;
 
-       abort_code = 0;
-       error = -EDESTADDRREQ;
+       e.error = -EDESTADDRREQ;
+       e.responded = false;
        for (i = 0; i < fc->server_list->nr_servers; i++) {
                struct afs_server *s = fc->server_list->servers[i].server;
-               int probe_error = READ_ONCE(s->probe.error);
 
-               switch (probe_error) {
-               case 0:
-                       continue;
-               default:
-                       if (error == -ETIMEDOUT ||
-                           error == -ETIME)
-                               continue;
-               case -ETIMEDOUT:
-               case -ETIME:
-                       if (error == -ENOMEM ||
-                           error == -ENONET)
-                               continue;
-               case -ENOMEM:
-               case -ENONET:
-                       if (error == -ENETUNREACH)
-                               continue;
-               case -ENETUNREACH:
-                       if (error == -EHOSTUNREACH)
-                               continue;
-               case -EHOSTUNREACH:
-                       if (error == -ECONNREFUSED)
-                               continue;
-               case -ECONNREFUSED:
-                       if (error == -ECONNRESET)
-                               continue;
-               case -ECONNRESET: /* Responded, but call expired. */
-                       if (error == -ECONNABORTED)
-                               continue;
-               case -ECONNABORTED:
-                       abort_code = s->probe.abort_code;
-                       error = probe_error;
-                       continue;
-               }
+               afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+                                    s->probe.abort_code);
        }
 
-       if (error == -ECONNABORTED)
-               error = afs_abort_to_error(abort_code);
-
 failed_set_error:
        fc->error = error;
 failed:
@@ -553,8 +522,11 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
                _leave(" = f [abort]");
                return false;
 
+       case -ERFKILL:
+       case -EADDRNOTAVAIL:
        case -ENETUNREACH:
        case -EHOSTUNREACH:
+       case -EHOSTDOWN:
        case -ECONNREFUSED:
        case -ETIMEDOUT:
        case -ETIME:
@@ -633,6 +605,7 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
        struct afs_net *net = afs_v2net(fc->vnode);
 
        if (fc->error == -EDESTADDRREQ ||
+           fc->error == -EADDRNOTAVAIL ||
            fc->error == -ENETUNREACH ||
            fc->error == -EHOSTUNREACH)
                afs_dump_edestaddrreq(fc);
index c0f616bd70cba33f292e7c7cbb9d3c4e3e310f21..f0b032976487cd5bde632b171ebaf858eb11a52c 100644 (file)
@@ -61,8 +61,11 @@ void afs_vlserver_probe_result(struct afs_call *call)
                afs_io_error(call, afs_io_error_vl_probe_fail);
                goto out;
        case -ECONNRESET: /* Responded, but call expired. */
+       case -ERFKILL:
+       case -EADDRNOTAVAIL:
        case -ENETUNREACH:
        case -EHOSTUNREACH:
+       case -EHOSTDOWN:
        case -ECONNREFUSED:
        case -ETIMEDOUT:
        case -ETIME:
@@ -129,15 +132,17 @@ void afs_vlserver_probe_result(struct afs_call *call)
  * Probe all of a vlserver's addresses to find out the best route and to
  * query its capabilities.
  */
-static int afs_do_probe_vlserver(struct afs_net *net,
-                                struct afs_vlserver *server,
-                                struct key *key,
-                                unsigned int server_index)
+static bool afs_do_probe_vlserver(struct afs_net *net,
+                                 struct afs_vlserver *server,
+                                 struct key *key,
+                                 unsigned int server_index,
+                                 struct afs_error *_e)
 {
        struct afs_addr_cursor ac = {
                .index = 0,
        };
-       int ret;
+       bool in_progress = false;
+       int err;
 
        _enter("%s", server->name);
 
@@ -151,15 +156,17 @@ static int afs_do_probe_vlserver(struct afs_net *net,
        server->probe.rtt = UINT_MAX;
 
        for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
-               ret = afs_vl_get_capabilities(net, &ac, key, server,
+               err = afs_vl_get_capabilities(net, &ac, key, server,
                                              server_index, true);
-               if (ret != -EINPROGRESS) {
-                       afs_vl_probe_done(server);
-                       return ret;
-               }
+               if (err == -EINPROGRESS)
+                       in_progress = true;
+               else
+                       afs_prioritise_error(_e, err, ac.abort_code);
        }
 
-       return 0;
+       if (!in_progress)
+               afs_vl_probe_done(server);
+       return in_progress;
 }
 
 /*
@@ -169,21 +176,23 @@ int afs_send_vl_probes(struct afs_net *net, struct key *key,
                       struct afs_vlserver_list *vllist)
 {
        struct afs_vlserver *server;
-       int i, ret;
+       struct afs_error e;
+       bool in_progress = false;
+       int i;
 
+       e.error = 0;
+       e.responded = false;
        for (i = 0; i < vllist->nr_servers; i++) {
                server = vllist->servers[i].server;
                if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
                        continue;
 
-               if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
-                       ret = afs_do_probe_vlserver(net, server, key, i);
-                       if (ret)
-                               return ret;
-               }
+               if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) &&
+                   afs_do_probe_vlserver(net, server, key, i, &e))
+                       in_progress = true;
        }
 
-       return 0;
+       return in_progress ? 0 : e.error;
 }
 
 /*
index b64a284b99d272e46a75d35bad5514580e90a132..7adde83a06482b56c555c18c3629c335a2315397 100644 (file)
@@ -71,8 +71,9 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 {
        struct afs_addr_list *alist;
        struct afs_vlserver *vlserver;
+       struct afs_error e;
        u32 rtt;
-       int error = vc->ac.error, abort_code, i;
+       int error = vc->ac.error, i;
 
        _enter("%lx[%d],%lx[%d],%d,%d",
               vc->untried, vc->index,
@@ -119,8 +120,11 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
                        goto failed;
                }
 
+       case -ERFKILL:
+       case -EADDRNOTAVAIL:
        case -ENETUNREACH:
        case -EHOSTUNREACH:
+       case -EHOSTDOWN:
        case -ECONNREFUSED:
        case -ETIMEDOUT:
        case -ETIME:
@@ -235,50 +239,15 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
        if (vc->flags & AFS_VL_CURSOR_RETRY)
                goto restart_from_beginning;
 
-       abort_code = 0;
-       error = -EDESTADDRREQ;
+       e.error = -EDESTADDRREQ;
+       e.responded = false;
        for (i = 0; i < vc->server_list->nr_servers; i++) {
                struct afs_vlserver *s = vc->server_list->servers[i].server;
-               int probe_error = READ_ONCE(s->probe.error);
 
-               switch (probe_error) {
-               case 0:
-                       continue;
-               default:
-                       if (error == -ETIMEDOUT ||
-                           error == -ETIME)
-                               continue;
-               case -ETIMEDOUT:
-               case -ETIME:
-                       if (error == -ENOMEM ||
-                           error == -ENONET)
-                               continue;
-               case -ENOMEM:
-               case -ENONET:
-                       if (error == -ENETUNREACH)
-                               continue;
-               case -ENETUNREACH:
-                       if (error == -EHOSTUNREACH)
-                               continue;
-               case -EHOSTUNREACH:
-                       if (error == -ECONNREFUSED)
-                               continue;
-               case -ECONNREFUSED:
-                       if (error == -ECONNRESET)
-                               continue;
-               case -ECONNRESET: /* Responded, but call expired. */
-                       if (error == -ECONNABORTED)
-                               continue;
-               case -ECONNABORTED:
-                       abort_code = s->probe.abort_code;
-                       error = probe_error;
-                       continue;
-               }
+               afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+                                    s->probe.abort_code);
        }
 
-       if (error == -ECONNABORTED)
-               error = afs_abort_to_error(abort_code);
-
 failed_set_error:
        vc->error = error;
 failed:
@@ -341,6 +310,7 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
        struct afs_net *net = vc->cell->net;
 
        if (vc->error == -EDESTADDRREQ ||
+           vc->error == -EADDRNOTAVAIL ||
            vc->error == -ENETUNREACH ||
            vc->error == -EHOSTUNREACH)
                afs_vl_dump_edestaddrreq(vc);